summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Licence75
-rw-r--r--Makefile93
-rw-r--r--Makefile.bor76
-rw-r--r--Makefile.dos72
-rw-r--r--Readme54
-rw-r--r--assemble.c945
-rw-r--r--assemble.h17
-rw-r--r--disasm.c667
-rw-r--r--disasm.h18
-rw-r--r--float.c389
-rw-r--r--float.h16
-rw-r--r--insns.dat984
-rw-r--r--insns.h66
-rw-r--r--insns.pl160
-rw-r--r--internal.doc268
-rw-r--r--labels.c292
-rw-r--r--labels.h17
-rw-r--r--lcc/Readme57
-rw-r--r--lcc/bind.c23
-rw-r--r--lcc/lin-aout.c44
-rw-r--r--lcc/lin-elf.c45
-rw-r--r--lcc/x86nasm.md703
-rw-r--r--misc/magic6
-rw-r--r--misc/nasm.sl305
-rw-r--r--names.c79
-rw-r--r--nasm.c648
-rw-r--r--nasm.doc996
-rw-r--r--nasm.h443
-rw-r--r--nasmlib.c488
-rw-r--r--nasmlib.h115
-rw-r--r--ndisasm.c270
-rw-r--r--ndisasm.doc199
-rw-r--r--outaout.c466
-rw-r--r--outas86.c548
-rw-r--r--outbin.c303
-rw-r--r--outcoff.c611
-rw-r--r--outdbg.c138
-rw-r--r--outelf.c620
-rw-r--r--outform.c42
-rw-r--r--outform.h167
-rw-r--r--outobj.c1229
-rw-r--r--outrdf.c467
-rw-r--r--parser.c1306
-rw-r--r--parser.h18
-rw-r--r--rdoff/Makefile43
-rw-r--r--rdoff/collectn.c40
-rw-r--r--rdoff/collectn.h22
-rw-r--r--rdoff/ldrdf.c540
-rw-r--r--rdoff/rdf.doc99
-rw-r--r--rdoff/rdfdump.c156
-rw-r--r--rdoff/rdfload.c173
-rw-r--r--rdoff/rdfload.h29
-rw-r--r--rdoff/rdoff.c367
-rw-r--r--rdoff/rdoff.h112
-rw-r--r--rdoff/rdx.c61
-rw-r--r--rdoff/symtab.c80
-rw-r--r--rdoff/symtab.h22
-rw-r--r--sync.c84
-rw-r--r--sync.h16
-rw-r--r--test/Makefile2
-rw-r--r--test/aouttest.asm83
-rw-r--r--test/aouttest.c35
-rw-r--r--test/bintest.asm56
-rw-r--r--test/cofftest.asm82
-rw-r--r--test/cofftest.c34
-rw-r--r--test/elftest.asm83
-rw-r--r--test/elftest.c35
-rw-r--r--test/inc1.asm4
-rw-r--r--test/inc2.asm8
-rw-r--r--test/inctest.asm15
-rw-r--r--test/objlink.c30
-rw-r--r--test/objtest.asm82
72 files changed, 16938 insertions, 0 deletions
diff --git a/Licence b/Licence
new file mode 100644
index 0000000..8cd2f6d
--- /dev/null
+++ b/Licence
@@ -0,0 +1,75 @@
+Terms and Conditions for the use of the Netwide Assembler
+=========================================================
+
+Can I have the gist without reading the legalese?
+-------------------------------------------------
+
+Basically, NASM is free. You can't charge for it. You can copy it as
+much as you like. You can incorporate it, or bits of it, into other
+free programs if you want. (But we want to know about it if you do,
+and we want to be mentioned in the credits.) We may well allow you
+to incorporate it into commercial software too, but we'll probably
+demand some money for it, and we'll certainly demand to be given
+credit. And in extreme cases (although I can't immediately think of
+a reason we might actually want to do this) we may refuse to let you
+do it at all.
+
+NASM LICENCE AGREEMENT
+======================
+
+By "the Software" this licence refers to the complete contents of
+the NASM archive, excluding this licence document itself, and
+excluding the contents of the `test' directory. The Netwide
+Disassembler, NDISASM, is specifically included under this licence.
+
+I. The Software is freely redistributable; anyone may copy the
+Software, or parts of the Software, and give away as many copies as
+they like to anyone, as long as this licence document is kept with
+the Software. Charging a fee for the Software is prohibited,
+although a fee may be charged for the act of transferring a copy,
+and you can offer warranty protection and charge a fee for that.
+
+II. The Software, or parts thereof, may be incorporated into other
+freely redistributable software (by which we mean software that may
+be obtained free of charge) without requiring permission from the
+authors, as long as due credit is given to the authors of the
+Software in the resulting work, as long as the authors are informed
+of this action, and as long as those parts of the Software that are
+used remain under this licence.
+
+III. The Software, or parts thereof, may be incorporated into other
+software which is not freely redistributable (i.e. software for
+which a fee is charged), as long as permission is granted from the
+authors of the Software. The authors reserve the right to grant this
+permission only for a fee, which may at our option take the form of
+royalty payments. The authors also reserve the right to refuse to
+grant permission if they deem it necessary.
+
+IV. You may not copy, modify or distribute the Software except under
+the terms given in this licence document. You may not sublicense the
+Software or in any way place it under any other licence than this
+one. Since you have not signed this licence, you are not of course
+required to accept it; however, no other licence applies to the
+Software, and nothing else grants you any permission to copy,
+modify, sublicense or distribute the Software in any way. These
+actions are therefore prohibited if you do not accept this licence.
+
+V. There is no warranty for the Software, to the extent permitted by
+applicable law. The authors provide the Software "as is" without
+warranty of any kind, either expressed or implied, including but not
+limited to the implied warranties of merchantability and fitness for
+a particular purpose. The entire risk as to the quality and
+performance of the Software is with you. Should the Software prove
+defective, you assume the cost of all necessary servicing, repair or
+correction.
+
+VI. In no event, unless required by applicable law or agreed to in
+writing, will any of the authors be liable to you for damages,
+including any general, special, incidental or consequential damages,
+arising out of the use or the inability to use the Software,
+including but not limited to loss of data or data being rendered
+inaccurate or a failure of the Software to operate with any other
+programs, even if you have been advised of the possibility of such
+damages.
+
+END OF LICENCE AGREEMENT
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..605ab41
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,93 @@
+# Makefile for the Netwide Assembler
+#
+# The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+# Julian Hall. All rights reserved. The software is
+# redistributable under the licence given in the file "Licence"
+# distributed in the NASM archive.
+#
+# This Makefile is designed for use under Unix (probably fairly
+# portably). It can also be used without change to build NASM using
+# DJGPP. The makefile "Makefile.dos" can be used to build NASM using
+# a 16-bit DOS C compiler such as Microsoft C.
+#
+# The `make dist' section at the end of the Makefile is not
+# guaranteed to work anywhere except Linux. Come to think of it,
+# I'm not sure I want to guarantee it to work anywhere except on
+# _my_ computer. :-)
+
+CC = gcc
+CCFLAGS = -c -g -O -Wall -ansi -pedantic
+LINK = gcc
+LINKFLAGS = -o nasm
+DLINKFLAGS = -o ndisasm
+LIBRARIES =
+STRIP = strip
+EXE =#
+OBJ = o#
+
+.c.$(OBJ):
+ $(CC) $(CCFLAGS) $*.c
+
+NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \
+ assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \
+ outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \
+ outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ)
+
+NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \
+ insnsd.$(OBJ)
+
+all : nasm$(EXE) ndisasm$(EXE)
+
+nasm$(EXE): $(NASMOBJS)
+ $(LINK) $(LINKFLAGS) $(NASMOBJS) $(LIBRARIES)
+
+ndisasm$(EXE): $(NDISASMOBJS)
+ $(LINK) $(DLINKFLAGS) $(NDISASMOBJS) $(LIBRARIES)
+
+assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h
+disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c
+float.$(OBJ): float.c nasm.h
+insnsa.$(OBJ): insnsa.c nasm.h insns.h
+insnsd.$(OBJ): insnsd.c nasm.h insns.h
+labels.$(OBJ): labels.c nasm.h nasmlib.h
+nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h
+nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h
+ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h
+outas86.$(OBJ): outas86.c nasm.h nasmlib.h
+outaout.$(OBJ): outaout.c nasm.h nasmlib.h
+outbin.$(OBJ): outbin.c nasm.h nasmlib.h
+outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h
+outelf.$(OBJ): outelf.c nasm.h nasmlib.h
+outobj.$(OBJ): outobj.c nasm.h nasmlib.h
+outform.$(OBJ): outform.c outform.h nasm.h
+parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c
+sync.$(OBJ): sync.c sync.h
+
+# These two source files are automagically generated from a single
+# instruction-table file by a Perl script. They're distributed,
+# though, so it isn't necessary to have Perl just to recompile NASM
+# from the distribution.
+
+AUTOSRCS = insnsa.c insnsd.c
+$(AUTOSRCS): insns.dat insns.pl
+ perl insns.pl
+
+clean :
+ rm -f $(NASMOBJS) $(NDISASMOBJS) nasm$(EXE) ndisasm$(EXE)
+ make -C rdoff clean
+ make -C test clean
+
+# Here the `make dist' section begins. Nothing is guaranteed hereafter
+# unless you're using the Makefile under Linux, running bash, with
+# gzip, GNU tar and a sensible version of zip readily available.
+
+DOSEXES = nasm.exe ndisasm.exe
+MANPAGES = nasm.man ndisasm.man
+
+.SUFFIXES: .man .1
+
+.1.man:
+ -man ./$< | ul > $@
+
+dist: $(AUTOSRCS) $(MANPAGES) $(DOSEXES) clean
+ makedist.sh
diff --git a/Makefile.bor b/Makefile.bor
new file mode 100644
index 0000000..75aed4f
--- /dev/null
+++ b/Makefile.bor
@@ -0,0 +1,76 @@
+# Makefile for the Netwide Assembler under 16-bit DOS
+#
+# The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+# Julian Hall. All rights reserved. The software is
+# redistributable under the licence given in the file "Licence"
+# distributed in the NASM archive.
+#
+# This Makefile is designed to build NASM using a 16-bit DOS C
+# compiler such as Borland C, and has been tested with Borland C 2.3
+# and Borland Make.
+
+# CC = cl
+# CCFLAGS = /c /O /AL
+# LINK = cl
+CC = bcc
+CCFLAGS = -c -O -ml -A
+LINK = tlink /c /Lc:\bc\lib
+LINKFLAGS =
+LIBRARIES =
+EXE = .exe#
+OBJ = obj#
+
+.c.$(OBJ):
+ $(CC) $(CCFLAGS) $*.c
+
+NASMOBJS1 = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ)
+NASMOBJS2 = assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ)
+NASMOBJS3 = outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ)
+NASMOBJS4 = outobj.$(OBJ) outas86.$(OBJ) outdbg.$(OBJ) outrdf.$(OBJ)
+
+NASMOBJS = $(NASMOBJS1) $(NASMOBJS2) $(NASMOBJS3) $(NASMOBJS4)
+
+NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \
+ insnsd.$(OBJ)
+
+all : nasm$(EXE) ndisasm$(EXE)
+
+# We have to have a horrible kludge here to get round the 128 character
+# limit, as usual...
+nasm$(EXE): $(NASMOBJS)
+# $(LINK) /Fenasm.exe a*.obj f*.obj insnsa.obj l*.obj na*.obj o*.obj p*.obj
+ echo c0l.obj $(NASMOBJS1) +> nasmobjs.tmp
+ echo $(NASMOBJS2) +>> nasmobjs.tmp
+ echo $(NASMOBJS3) +>> nasmobjs.tmp
+ echo $(NASMOBJS4),nasm.exe,,cl.lib, >> nasmobjs.tmp
+ $(LINK) /Tde @nasmobjs.tmp
+
+ndisasm$(EXE): $(NDISASMOBJS)
+# $(LINK) /Fendisasm.exe $(NDISASMOBJS)
+ $(LINK) /Tde $(NDISASMOBJS),ndisasm.exe,,cl.lib,
+
+assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h
+disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c
+float.$(OBJ): float.c nasm.h
+insnsa.$(OBJ): insnsa.c nasm.h insns.h
+insnsd.$(OBJ): insnsd.c nasm.h insns.h
+labels.$(OBJ): labels.c nasm.h nasmlib.h
+nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h
+nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h
+ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h
+outas86.$(OBJ): outas86.c nasm.h nasmlib.h
+outaout.$(OBJ): outaout.c nasm.h nasmlib.h
+outbin.$(OBJ): outbin.c nasm.h nasmlib.h
+outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h
+outdbg.$(OBJ): outdbg.c nasm.h nasmlib.h
+outelf.$(OBJ): outelf.c nasm.h nasmlib.h
+outobj.$(OBJ): outobj.c nasm.h nasmlib.h
+outrdf.$(OBJ): outrdf.c nasm.h nasmlib.h
+outform.$(OBJ): outform.c outform.h nasm.h
+parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c
+sync.$(OBJ): sync.c sync.h
+
+clean :
+ del *.obj
+ del nasm$(EXE)
+ del ndisasm$(EXE)
diff --git a/Makefile.dos b/Makefile.dos
new file mode 100644
index 0000000..cb75708
--- /dev/null
+++ b/Makefile.dos
@@ -0,0 +1,72 @@
+# Makefile for the Netwide Assembler under 16-bit DOS
+#
+# The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+# Julian Hall. All rights reserved. The software is
+# redistributable under the licence given in the file "Licence"
+# distributed in the NASM archive.
+#
+# This Makefile is designed to build NASM using a 16-bit DOS C
+# compiler such as Microsoft C, provided you have a compatible MAKE.
+# It's been tested with Microsoft C 5.x plus Borland Make. (Yes, I
+# know it's silly, but...)
+
+CC = cl
+CCFLAGS = /c /O /AL
+LINK = cl
+LINKFLAGS =
+LIBRARIES =
+EXE = .exe#
+OBJ = obj#
+
+.c.$(OBJ):
+ $(CC) $(CCFLAGS) $*.c
+
+NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \
+ assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \
+ outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \
+ outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ)
+
+NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \
+ insnsd.$(OBJ)
+
+all : nasm$(EXE) ndisasm$(EXE)
+
+# We have to have a horrible kludge here to get round the 128 character
+# limit, as usual...
+nasm$(EXE): $(NASMOBJS)
+ cl /Fenasm.exe a*.obj f*.obj insnsa.obj l*.obj na*.obj o*.obj p*.obj
+
+ndisasm$(EXE): $(NDISASMOBJS)
+ cl /Fendisasm.exe $(NDISASMOBJS)
+
+assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h
+disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c
+float.$(OBJ): float.c nasm.h
+labels.$(OBJ): labels.c nasm.h nasmlib.h
+nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h
+nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h
+ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h
+outas86.$(OBJ): outas86.c nasm.h nasmlib.h
+outaout.$(OBJ): outaout.c nasm.h nasmlib.h
+outbin.$(OBJ): outbin.c nasm.h nasmlib.h
+outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h
+outdbg.$(OBJ): outdbg.c nasm.h nasmlib.h
+outelf.$(OBJ): outelf.c nasm.h nasmlib.h
+outobj.$(OBJ): outobj.c nasm.h nasmlib.h
+outrdf.$(OBJ): outrdf.c nasm.h nasmlib.h
+outform.$(OBJ): outform.c outform.h nasm.h
+parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c
+sync.$(OBJ): sync.c sync.h
+
+# Another grotty hack: QC is less likely to run out of memory than
+# CL proper; and we don't need any optimisation in these modules
+# since they're just data.
+insnsa.$(OBJ): insnsa.c nasm.h insns.h
+ qcl /c /AL insnsa.c
+insnsd.$(OBJ): insnsd.c nasm.h insns.h
+ qcl /c /AL insnsd.c
+
+clean :
+ del *.obj
+ del nasm$(EXE)
+ del ndisasm$(EXE)
diff --git a/Readme b/Readme
new file mode 100644
index 0000000..5a036fe
--- /dev/null
+++ b/Readme
@@ -0,0 +1,54 @@
+This is a distribution of NASM, the Netwide Assembler. NASM is a
+prototype general-purpose x86 assembler. It will currently output
+flat-form binary files, a.out, COFF and ELF Unix object files,
+Microsoft 16-bit DOS and Win32 object files, the as86 object format,
+and a home-grown format called RDF.
+
+Also included is NDISASM, a prototype x86 binary-file disassembler
+which uses the same instruction table as NASM.
+
+To install NASM, you will need GCC. Type `make', and then when it
+has finished copy the file `nasm' (and maybe `ndisasm') to a
+directory on your search path (I use /usr/local/bin on my linux
+machine at home, and ~/bin on other machines where I don't have root
+access). You may also want to copy the man page `nasm.1' (and maybe
+`ndisasm.1') to somewhere sensible.
+
+If you want to build a restricted version of NASM containing only
+some of the object file formats, you can achieve this by adding
+#defines to `outform.h' (see the file itself for documentation), or
+equivalently by adding compiler command line options in the
+Makefile.
+
+There is a machine description file for the `LCC' retargetable C
+compiler, in the directory `lcc', along with instructions for its
+use. This means that NASM can now be used as the code-generator back
+end for a useful C compiler.
+
+Michael `Wuschel' Tippach has ported his DOS extender `WDOSX' to
+enable it to work with the 32-bit binary files NASM can output: the
+original extender and his port `WDOSX/N' are available from his web
+page, http://www.geocities.com/SiliconValley/Park/4493.
+
+The `misc' directory contains `nasm.sl', a NASM editing mode for the
+JED programmers' editor (see http://space.mit.edu/~davis/jed.html
+for details about JED). The comment at the start of the file gives
+instructions on how to install the mode. This directory also
+contains a file (`magic') containing lines to add to /etc/magic on
+Unix systems to allow the `file' command to recognise RDF files.
+
+The `rdoff' directory contains sources for a linker and loader for
+the RDF object file format, to run under Linux, and also
+documentation on the internal structure of RDF files.
+
+For information about how you can distribute and use NASM, see the
+file Licence. We were tempted to put NASM under the GPL, but decided
+that in many ways it was too restrictive for developers.
+
+For information about how to use NASM, see `nasm.doc'. For
+information about how to use NDISASM, see `ndisasm.doc'. For
+information about the internal structure of NASM, see
+`internals.doc'.
+
+Bug reports (and patches if you can) should be sent to
+jules@dcs.warwick.ac.uk or anakin@pobox.com.
diff --git a/assemble.c b/assemble.c
new file mode 100644
index 0000000..bab6f29
--- /dev/null
+++ b/assemble.c
@@ -0,0 +1,945 @@
+/* assemble.c code generation for the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ *
+ * the actual codes (C syntax, i.e. octal):
+ * \0 - terminates the code. (Unless it's a literal of course.)
+ * \1, \2, \3 - that many literal bytes follow in the code stream
+ * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
+ * (POP is never used for CS) depending on operand 0
+ * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
+ * on operand 0
+ * \10, \11, \12 - a literal byte follows in the code stream, to be added
+ * to the register value of operand 0, 1 or 2
+ * \17 - encodes the literal byte 0. (Some compilers don't take
+ * kindly to a zero byte in the _middle_ of a compile time
+ * string constant, so I had to put this hack in.)
+ * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
+ * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
+ * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
+ * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
+ * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
+ * assembly mode or the address-size override on the operand
+ * \37 - a word constant, from the _segment_ part of operand 0
+ * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
+ * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
+ * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
+ * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
+ * assembly mode or the address-size override on the operand
+ * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
+ * \1ab - a ModRM, calculated on EA in operand a, with the spare
+ * field the register value of operand b.
+ * \2ab - a ModRM, calculated on EA in operand a, with the spare
+ * field equal to digit b.
+ * \30x - might be an 0x67 byte, depending on the address size of
+ * the memory reference in operand x.
+ * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
+ * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
+ * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
+ * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
+ * \322 - indicates that this instruction is only valid when the
+ * operand size is the default (instruction to disassembler,
+ * generates no code in the assembler)
+ * \330 - a literal byte follows in the code stream, to be added
+ * to the condition code value of the instruction.
+ * \340 - reserve <operand 0> bytes of uninitialised storage.
+ * Operand 0 had better be a segmentless constant.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "nasm.h"
+#include "assemble.h"
+#include "insns.h"
+
+extern struct itemplate *nasm_instructions[];
+
+typedef struct {
+ int sib_present; /* is a SIB byte necessary? */
+ int bytes; /* # of bytes of offset needed */
+ int size; /* lazy - this is sib+bytes+1 */
+ unsigned char modrm, sib; /* the bytes themselves */
+} ea;
+
+static efunc errfunc;
+static struct ofmt *outfmt;
+
+static long calcsize (long, long, int, insn *, char *);
+static void gencode (long, long, int, insn *, char *, long);
+static int regval (operand *o);
+static int matches (struct itemplate *, insn *);
+static ea *process_ea (operand *, ea *, int, int);
+static int chsize (operand *, int);
+
+long assemble (long segment, long offset, int bits,
+ insn *instruction, struct ofmt *output, efunc error) {
+ int j, itimes, size_prob;
+ long insn_end;
+ long start = offset;
+ struct itemplate *temp;
+
+ errfunc = error; /* to pass to other functions */
+ outfmt = output; /* likewise */
+
+ if (instruction->opcode == -1)
+ return 0;
+
+ if (instruction->opcode == I_DB ||
+ instruction->opcode == I_DW ||
+ instruction->opcode == I_DD ||
+ instruction->opcode == I_DQ ||
+ instruction->opcode == I_DT) {
+ extop *e;
+ long osize, wsize = 0; /* placate gcc */
+ int t = instruction->times;
+
+ switch (instruction->opcode) {
+ case I_DB: wsize = 1; break;
+ case I_DW: wsize = 2; break;
+ case I_DD: wsize = 4; break;
+ case I_DQ: wsize = 8; break;
+ case I_DT: wsize = 10; break;
+ }
+
+ while (t--) {
+ for (e = instruction->eops; e; e = e->next) {
+ osize = 0;
+ if (e->type == EOT_DB_NUMBER) {
+ if (wsize == 1) {
+ if (e->segment != NO_SEG)
+ errfunc (ERR_NONFATAL,
+ "one-byte relocation attempted");
+ else {
+ unsigned char c = e->offset;
+ outfmt->output (segment, &c, OUT_RAWDATA+1,
+ NO_SEG, NO_SEG);
+ }
+ } else if (wsize > 5) {
+ errfunc (ERR_NONFATAL, "integer supplied to a D%c"
+ " instruction", wsize==8 ? 'Q' : 'T');
+ } else
+ outfmt->output (segment, &e->offset,
+ OUT_ADDRESS+wsize, e->segment,
+ e->wrt);
+ offset += wsize;
+ } else if (e->type == EOT_DB_STRING) {
+ int align;
+
+ align = (-e->stringlen) % wsize;
+ if (align < 0)
+ align += wsize;
+ outfmt->output (segment, e->stringval,
+ OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
+ if (align)
+ outfmt->output (segment, "\0\0\0\0",
+ OUT_RAWDATA+align, NO_SEG, NO_SEG);
+ offset += e->stringlen + align;
+ }
+ }
+ }
+ return offset - start;
+ }
+
+ size_prob = FALSE;
+ temp = nasm_instructions[instruction->opcode];
+ while (temp->opcode != -1) {
+ int m = matches (temp, instruction);
+ if (m == 100) { /* matches! */
+ char *codes = temp->code;
+ long insn_size = calcsize(segment, offset, bits,
+ instruction, codes);
+ itimes = instruction->times;
+ if (insn_size < 0) /* shouldn't be, on pass two */
+ error (ERR_PANIC, "errors made it through from pass one");
+ else while (itimes--) {
+ insn_end = offset + insn_size;
+ for (j=0; j<instruction->nprefix; j++) {
+ unsigned char c;
+ switch (instruction->prefixes[j]) {
+ case P_LOCK:
+ c = 0xF0; break;
+ case P_REPNE: case P_REPNZ:
+ c = 0xF2; break;
+ case P_REPE: case P_REPZ: case P_REP:
+ c = 0xF3; break;
+ case R_CS: c = 0x2E; break;
+ case R_DS: c = 0x3E; break;
+ case R_ES: c = 0x26; break;
+ case R_FS: c = 0x64; break;
+ case R_GS: c = 0x65; break;
+ case R_SS: c = 0x36; break;
+ case P_A16:
+ if (bits == 16)
+ c = 0; /* no prefix */
+ else
+ c = 0x67;
+ break;
+ case P_A32:
+ if (bits == 32)
+ c = 0; /* no prefix */
+ else
+ c = 0x67;
+ break;
+ case P_O16:
+ if (bits == 16)
+ c = 0; /* no prefix */
+ else
+ c = 0x66;
+ break;
+ case P_O32:
+ if (bits == 32)
+ c = 0; /* no prefix */
+ else
+ c = 0x66;
+ break;
+ default:
+ error (ERR_PANIC,
+ "invalid instruction prefix");
+ }
+ if (c != 0)
+ outfmt->output (segment, &c, OUT_RAWDATA+1,
+ NO_SEG, NO_SEG);
+ offset++;
+ }
+ gencode (segment, offset, bits, instruction, codes, insn_end);
+ offset += insn_size;
+ }
+ return offset - start;
+ } else if (m > 0) {
+ size_prob = m;
+ }
+ temp++;
+ }
+ if (temp->opcode == -1) { /* didn't match any instruction */
+ if (size_prob == 1) /* would have matched, but for size */
+ error (ERR_NONFATAL, "operation size not specified");
+ else if (size_prob == 2)
+ error (ERR_NONFATAL, "mismatch in operand sizes");
+ else
+ error (ERR_NONFATAL,
+ "invalid combination of opcode and operands");
+ }
+ return 0;
+}
+
+long insn_size (long segment, long offset, int bits,
+ insn *instruction, efunc error) {
+ struct itemplate *temp;
+
+ errfunc = error; /* to pass to other functions */
+
+ if (instruction->opcode == -1)
+ return 0;
+
+ if (instruction->opcode == I_DB ||
+ instruction->opcode == I_DW ||
+ instruction->opcode == I_DD ||
+ instruction->opcode == I_DQ ||
+ instruction->opcode == I_DT) {
+ extop *e;
+ long isize, osize, wsize = 0; /* placate gcc */
+
+ isize = 0;
+ switch (instruction->opcode) {
+ case I_DB: wsize = 1; break;
+ case I_DW: wsize = 2; break;
+ case I_DD: wsize = 4; break;
+ case I_DQ: wsize = 8; break;
+ case I_DT: wsize = 10; break;
+ }
+
+ for (e = instruction->eops; e; e = e->next) {
+ long align;
+
+ osize = 0;
+ if (e->type == EOT_DB_NUMBER)
+ osize = 1;
+ else if (e->type == EOT_DB_STRING)
+ osize = e->stringlen;
+
+ align = (-osize) % wsize;
+ if (align < 0)
+ align += wsize;
+ isize += osize + align;
+ }
+ return isize * instruction->times;
+ }
+
+ temp = nasm_instructions[instruction->opcode];
+ while (temp->opcode != -1) {
+ if (matches(temp, instruction) == 100) {
+ /* we've matched an instruction. */
+ long isize;
+ char *codes = temp->code;
+ int j;
+
+ isize = calcsize(segment, offset, bits, instruction, codes);
+ if (isize < 0)
+ return -1;
+ for (j = 0; j < instruction->nprefix; j++) {
+ if ((instruction->prefixes[j] != P_A16 &&
+ instruction->prefixes[j] != P_O16 && bits==16) ||
+ (instruction->prefixes[j] != P_A32 &&
+ instruction->prefixes[j] != P_O32 && bits==32))
+ isize++;
+ }
+ return isize * instruction->times;
+ }
+ temp++;
+ }
+ return -1; /* didn't match any instruction */
+}
+
+static long calcsize (long segment, long offset, int bits,
+ insn *ins, char *codes) {
+ long length = 0;
+ unsigned char c;
+
+ while (*codes) switch (c = *codes++) {
+ case 01: case 02: case 03:
+ codes += c, length += c; break;
+ case 04: case 05: case 06: case 07:
+ length++; break;
+ case 010: case 011: case 012:
+ codes++, length++; break;
+ case 017:
+ length++; break;
+ case 014: case 015: case 016:
+ length++; break;
+ case 020: case 021: case 022:
+ length++; break;
+ case 024: case 025: case 026:
+ length++; break;
+ case 030: case 031: case 032:
+ length += 2; break;
+ case 034: case 035: case 036:
+ length += ((ins->oprs[c-034].addr_size ?
+ ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
+ case 037:
+ length += 2; break;
+ case 040: case 041: case 042:
+ length += 4; break;
+ case 050: case 051: case 052:
+ length++; break;
+ case 060: case 061: case 062:
+ length += 2; break;
+ case 064: case 065: case 066:
+ length += ((ins->oprs[c-064].addr_size ?
+ ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
+ case 070: case 071: case 072:
+ length += 4; break;
+ case 0300: case 0301: case 0302:
+ length += chsize (&ins->oprs[c-0300], bits);
+ break;
+ case 0310:
+ length += (bits==32);
+ break;
+ case 0311:
+ length += (bits==16);
+ break;
+ case 0312:
+ break;
+ case 0320:
+ length += (bits==32);
+ break;
+ case 0321:
+ length += (bits==16);
+ break;
+ case 0322:
+ break;
+ case 0330:
+ codes++, length++; break;
+ case 0340: case 0341: case 0342:
+ if (ins->oprs[0].segment != NO_SEG)
+ errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
+ " quantity of BSS space");
+ else
+ length += ins->oprs[0].offset << (c-0340);
+ break;
+ default: /* can't do it by 'case' statements */
+ if (c>=0100 && c<=0277) { /* it's an EA */
+ ea ea_data;
+
+ if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0)) {
+ errfunc (ERR_NONFATAL, "invalid effective address");
+ return -1;
+ } else
+ length += ea_data.size;
+ } else
+ errfunc (ERR_PANIC, "internal instruction table corrupt"
+ ": instruction code 0x%02X given", c);
+ }
+ return length;
+}
+
+static void gencode (long segment, long offset, int bits,
+ insn *ins, char *codes, long insn_end) {
+ static char condval[] = { /* conditional opcodes */
+ 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
+ 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
+ 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
+ };
+ unsigned char c, bytes[4];
+ long data, size;
+
+ while (*codes) switch (c = *codes++) {
+ case 01: case 02: case 03:
+ outfmt->output (segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
+ codes += c;
+ offset += c;
+ break;
+ case 04: case 06:
+ switch (ins->oprs[0].basereg) {
+ case R_CS: bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
+ case R_DS: bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
+ case R_ES: bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
+ case R_SS: bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
+ default:
+ errfunc (ERR_PANIC, "bizarre 8086 segment register received");
+ }
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset++;
+ break;
+ case 05: case 07:
+ switch (ins->oprs[0].basereg) {
+ case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
+ case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
+ default:
+ errfunc (ERR_PANIC, "bizarre 386 segment register received");
+ }
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset++;
+ break;
+ case 010: case 011: case 012:
+ bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+ case 017:
+ bytes[0] = 0;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+ case 014: case 015: case 016:
+ if (ins->oprs[c-014].offset < -128 || ins->oprs[c-014].offset > 127)
+ errfunc (ERR_WARNING, "signed byte value exceeds bounds");
+ bytes[0] = ins->oprs[c-014].offset;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+ case 020: case 021: case 022:
+ if (ins->oprs[c-020].offset < -128 || ins->oprs[c-020].offset > 255)
+ errfunc (ERR_WARNING, "byte value exceeds bounds");
+ bytes[0] = ins->oprs[c-020].offset;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+ case 024: case 025: case 026:
+ if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
+ errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
+ bytes[0] = ins->oprs[c-024].offset;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+ case 030: case 031: case 032:
+ if (ins->oprs[c-030].segment == NO_SEG &&
+ ins->oprs[c-030].wrt == NO_SEG &&
+ (ins->oprs[c-030].offset < -32768 ||
+ ins->oprs[c-030].offset > 65535))
+ errfunc (ERR_WARNING, "word value exceeds bounds");
+ data = ins->oprs[c-030].offset;
+ outfmt->output (segment, &data, OUT_ADDRESS+2,
+ ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
+ offset += 2;
+ break;
+ case 034: case 035: case 036:
+ data = ins->oprs[c-034].offset;
+ size = ((ins->oprs[c-034].addr_size ?
+ ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
+ if (size==16 && (data < -32768 || data > 65535))
+ errfunc (ERR_WARNING, "word value exceeds bounds");
+ outfmt->output (segment, &data, OUT_ADDRESS+size,
+ ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
+ offset += size;
+ break;
+ case 037:
+ if (ins->oprs[0].segment == NO_SEG)
+ errfunc (ERR_NONFATAL, "value referenced by FAR is not"
+ " relocatable");
+ data = 0L;
+ outfmt->output (segment, &data, OUT_ADDRESS+2,
+ outfmt->segbase(1+ins->oprs[0].segment),
+ ins->oprs[0].wrt);
+ offset += 2;
+ break;
+ case 040: case 041: case 042:
+ data = ins->oprs[c-040].offset;
+ outfmt->output (segment, &data, OUT_ADDRESS+4,
+ ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
+ offset += 4;
+ break;
+ case 050: case 051: case 052:
+ if (ins->oprs[c-050].segment != segment)
+ errfunc (ERR_NONFATAL, "short relative jump outside segment");
+ data = ins->oprs[c-050].offset - insn_end;
+ if (data > 127 || data < -128)
+ errfunc (ERR_NONFATAL, "short jump is out of range");
+ bytes[0] = data;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+ case 060: case 061: case 062:
+ if (ins->oprs[c-060].segment != segment) {
+ data = ins->oprs[c-060].offset;
+ outfmt->output (segment, &data, OUT_REL2ADR+insn_end-offset,
+ ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
+ } else {
+ data = ins->oprs[c-060].offset - insn_end;
+ outfmt->output (segment, &data, OUT_ADDRESS+2, NO_SEG, NO_SEG);
+ }
+ offset += 2;
+ break;
+ case 064: case 065: case 066:
+ size = ((ins->oprs[c-064].addr_size ?
+ ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
+ if (ins->oprs[c-064].segment != segment) {
+ data = ins->oprs[c-064].offset;
+ size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
+ outfmt->output (segment, &data, size+insn_end-offset,
+ ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
+ size = (bits == 16 ? 2 : 4);
+ } else {
+ data = ins->oprs[c-064].offset - insn_end;
+ outfmt->output (segment, &data, OUT_ADDRESS+size, NO_SEG, NO_SEG);
+ }
+ offset += size;
+ break;
+ case 070: case 071: case 072:
+ if (ins->oprs[c-070].segment != segment) {
+ data = ins->oprs[c-070].offset;
+ outfmt->output (segment, &data, OUT_REL4ADR+insn_end-offset,
+ ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
+ } else {
+ data = ins->oprs[c-070].offset - insn_end;
+ outfmt->output (segment, &data, OUT_ADDRESS+4, NO_SEG, NO_SEG);
+ }
+ offset += 4;
+ break;
+ case 0300: case 0301: case 0302:
+ if (chsize (&ins->oprs[c-0300], bits)) {
+ *bytes = 0x67;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ } else
+ offset += 0;
+ break;
+ case 0310:
+ if (bits==32) {
+ *bytes = 0x67;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ } else
+ offset += 0;
+ break;
+ case 0311:
+ if (bits==16) {
+ *bytes = 0x67;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ } else
+ offset += 0;
+ break;
+ case 0312:
+ break;
+ case 0320:
+ if (bits==32) {
+ *bytes = 0x66;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ } else
+ offset += 0;
+ break;
+ case 0321:
+ if (bits==16) {
+ *bytes = 0x66;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ } else
+ offset += 0;
+ break;
+ case 0322:
+ break;
+ case 0330:
+ *bytes = *codes++ + condval[ins->condition];
+ outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+ case 0340: case 0341: case 0342:
+ if (ins->oprs[0].segment != NO_SEG)
+ errfunc (ERR_PANIC, "non-constant BSS size in pass two");
+ else {
+ long size = ins->oprs[0].offset << (c-0340);
+ outfmt->output (segment, NULL, OUT_RESERVE+size, NO_SEG, NO_SEG);
+ offset += size;
+ }
+ break;
+ default: /* can't do it by 'case' statements */
+ if (c>=0100 && c<=0277) { /* it's an EA */
+ ea ea_data;
+ int rfield;
+ unsigned char *p;
+ long s;
+
+ if (c<=0177) /* pick rfield from operand b */
+ rfield = regval (&ins->oprs[c&7]);
+ else /* rfield is constant */
+ rfield = c & 7;
+ if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield))
+ errfunc (ERR_NONFATAL, "invalid effective address");
+
+ p = bytes;
+ *p++ = ea_data.modrm;
+ if (ea_data.sib_present)
+ *p++ = ea_data.sib;
+ /*
+ * the cast in the next line is to placate MS C...
+ */
+ outfmt->output (segment, bytes, OUT_RAWDATA+(long)(p-bytes),
+ NO_SEG, NO_SEG);
+ s = p-bytes;
+
+ switch (ea_data.bytes) {
+ case 0:
+ break;
+ case 1:
+ *bytes = ins->oprs[(c>>3)&7].offset;
+ outfmt->output (segment, bytes, OUT_RAWDATA+1,
+ NO_SEG, NO_SEG);
+ s++;
+ break;
+ case 2:
+ case 4:
+ data = ins->oprs[(c>>3)&7].offset;
+ outfmt->output (segment, &data, OUT_ADDRESS+ea_data.bytes,
+ ins->oprs[(c>>3)&7].segment,
+ ins->oprs[(c>>3)&7].wrt);
+ s += ea_data.bytes;
+ break;
+ }
+ offset += s;
+ } else
+ errfunc (ERR_PANIC, "internal instruction table corrupt"
+ ": instruction code 0x%02X given", c);
+ }
+}
+
+static int regval (operand *o) {
+ switch (o->basereg) {
+ case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
+ case R_ST0: case R_MM0:
+ return 0;
+ case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
+ case R_MM1:
+ return 1;
+ case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
+ case R_ST2: case R_MM2:
+ return 2;
+ case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
+ case R_TR3: case R_ST3: case R_MM3:
+ return 3;
+ case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
+ case R_ST4: case R_MM4:
+ return 4;
+ case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
+ case R_MM5:
+ return 5;
+ case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
+ case R_MM6:
+ return 6;
+ case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
+ case R_MM7:
+ return 7;
+ default: /* panic */
+ errfunc (ERR_PANIC, "invalid register operand given to regval()");
+ return 0;
+ }
+}
+
+static int matches (struct itemplate *itemp, insn *instruction) {
+ int i, size, oprs, ret;
+
+ ret = 100;
+
+ /*
+ * Check the opcode
+ */
+ if (itemp->opcode != instruction->opcode) return 0;
+
+ /*
+ * Count the operands
+ */
+ if (itemp->operands != instruction->operands) return 0;
+
+ /*
+ * Check that no spurious colons or TOs are present
+ */
+ for (i=0; i<itemp->operands; i++)
+ if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
+ return 0;
+
+ /*
+ * Check that the operand flags all match up
+ */
+ for (i=0; i<itemp->operands; i++)
+ if (itemp->opd[i] & ~instruction->oprs[i].type ||
+ ((itemp->opd[i] & SIZE_MASK) &&
+ ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
+ if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
+ (instruction->oprs[i].type & SIZE_MASK))
+ return 0;
+ else
+ ret = 1;
+ }
+
+ /*
+ * Check operand sizes
+ */
+ if (itemp->flags & IF_SB) {
+ size = BITS8;
+ oprs = itemp->operands;
+ } else if (itemp->flags & IF_SD) {
+ size = BITS32;
+ oprs = itemp->operands;
+ } else if (itemp->flags & (IF_SM | IF_SM2)) {
+ oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
+ size = 0; /* placate gcc */
+ for (i=0; i<oprs; i++)
+ if ( (size = itemp->opd[i] & SIZE_MASK) != 0)
+ break;
+ } else {
+ size = 0;
+ oprs = itemp->operands;
+ }
+
+ for (i=0; i<itemp->operands; i++)
+ if (!(itemp->opd[i] & SIZE_MASK) &&
+ (instruction->oprs[i].type & SIZE_MASK & ~size))
+ ret = 2;
+
+ return ret;
+}
+
+static ea *process_ea (operand *input, ea *output, int addrbits, int rfield) {
+ if (!(REGISTER & ~input->type)) { /* it's a single register */
+ static int regs[] = {
+ R_MM0, R_EAX, R_AX, R_AL, R_MM1, R_ECX, R_CX, R_CL,
+ R_MM2, R_EDX, R_DX, R_DL, R_MM3, R_EBX, R_BX, R_BL,
+ R_MM4, R_ESP, R_SP, R_AH, R_MM5, R_EBP, R_BP, R_CH,
+ R_MM6, R_ESI, R_SI, R_DH, R_MM7, R_EDI, R_DI, R_BH
+ };
+ int i;
+
+ for (i=0; i<elements(regs); i++)
+ if (input->basereg == regs[i]) break;
+ if (i<elements(regs)) {
+ output->sib_present = FALSE;/* no SIB necessary */
+ output->bytes = 0; /* no offset necessary either */
+ output->modrm = 0xC0 | (rfield << 3) | (i/4);
+ } else
+ return NULL;
+ } else { /* it's a memory reference */
+ if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
+ /* it's a pure offset */
+ if (input->addr_size)
+ addrbits = input->addr_size;
+ output->sib_present = FALSE;
+ output->bytes = (addrbits==32 ? 4 : 2);
+ output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
+ } else { /* it's an indirection */
+ int i=input->indexreg, b=input->basereg, s=input->scale;
+ long o=input->offset, seg=input->segment;
+
+ if (s==0) i = -1; /* make this easy, at least */
+
+ if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
+ || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
+ || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
+ || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
+ /* it must be a 32-bit memory reference. Firstly we have
+ * to check that all registers involved are type Exx. */
+ if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
+ && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
+ return NULL;
+ if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
+ && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
+ return NULL;
+
+ /* While we're here, ensure the user didn't specify WORD. */
+ if (input->addr_size == 16)
+ return NULL;
+
+ /* now reorganise base/index */
+ if (b==i) /* convert EAX+2*EAX to 3*EAX */
+ b = -1, s++;
+ if (b==-1 && s==1) /* single register should be base */
+ b = i, i = -1;
+ if (((s==2 && i!=R_ESP) || s==3 || s==5 || s==9) && b==-1)
+ b = i, s--; /* convert 3*EAX to EAX+2*EAX */
+ if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
+ return NULL; /* wrong, for various reasons */
+
+ if (i==-1 && b!=R_ESP) {/* no SIB needed */
+ int mod, rm;
+ switch(b) {
+ case R_EAX: rm = 0; break;
+ case R_ECX: rm = 1; break;
+ case R_EDX: rm = 2; break;
+ case R_EBX: rm = 3; break;
+ case R_EBP: rm = 5; break;
+ case R_ESI: rm = 6; break;
+ case R_EDI: rm = 7; break;
+ case -1: rm = 5; break;
+ default: /* should never happen */
+ return NULL;
+ }
+ if (b==-1 || (b!=R_EBP && o==0 && seg==NO_SEG))
+ mod = 0;
+ else if (o>=-128 && o<=127 && seg==NO_SEG)
+ mod = 1;
+ else
+ mod = 2;
+ output->sib_present = FALSE;
+ output->bytes = (b==-1 || mod==2 ? 4 : mod);
+ output->modrm = (mod<<6) | (rfield<<3) | rm;
+ } else { /* we need a SIB */
+ int mod, scale, index, base;
+
+ switch (b) {
+ case R_EAX: base = 0; break;
+ case R_ECX: base = 1; break;
+ case R_EDX: base = 2; break;
+ case R_EBX: base = 3; break;
+ case R_ESP: base = 4; break;
+ case R_EBP: case -1: base = 5; break;
+ case R_ESI: base = 6; break;
+ case R_EDI: base = 7; break;
+ default: /* then what the smeg is it? */
+ return NULL; /* panic */
+ }
+
+ switch (i) {
+ case R_EAX: index = 0; break;
+ case R_ECX: index = 1; break;
+ case R_EDX: index = 2; break;
+ case R_EBX: index = 3; break;
+ case -1: index = 4; break;
+ case R_EBP: index = 5; break;
+ case R_ESI: index = 6; break;
+ case R_EDI: index = 7; break;
+ default: /* then what the smeg is it? */
+ return NULL; /* panic */
+ }
+
+ if (i==-1) s = 1;
+ switch (s) {
+ case 1: scale = 0; break;
+ case 2: scale = 1; break;
+ case 4: scale = 2; break;
+ case 8: scale = 3; break;
+ default: /* then what the smeg is it? */
+ return NULL; /* panic */
+ }
+
+ if (b==-1 || (b!=R_EBP && o==0 && seg==NO_SEG))
+ mod = 0;
+ else if (o>=-128 && o<=127 && seg==NO_SEG)
+ mod = 1;
+ else
+ mod = 2;
+
+ output->sib_present = TRUE;
+ output->bytes = (b==-1 || mod==2 ? 4 : mod);
+ output->modrm = (mod<<6) | (rfield<<3) | 4;
+ output->sib = (scale<<6) | (index<<3) | base;
+ }
+ } else { /* it's 16-bit */
+ int mod, rm;
+
+ /* check all registers are BX, BP, SI or DI */
+ if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
+ (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
+ return NULL;
+
+ /* ensure the user didn't specify DWORD */
+ if (input->addr_size == 32)
+ return NULL;
+
+ if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
+ if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
+ if ((b==R_SI || b==R_DI) && i!=-1)
+ b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
+ if (b==i) return NULL;/* shouldn't ever happen, in theory */
+ if (i!=-1 && b!=-1 &&
+ (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
+ return NULL; /* invalid combinations */
+ if (b==-1) /* pure offset: handled above */
+ return NULL; /* so if it gets to here, panic! */
+
+ rm = -1;
+ if (i!=-1)
+ switch (i*256 + b) {
+ case R_SI*256+R_BX: rm=0; break;
+ case R_DI*256+R_BX: rm=1; break;
+ case R_SI*256+R_BP: rm=2; break;
+ case R_DI*256+R_BP: rm=3; break;
+ }
+ else
+ switch (b) {
+ case R_SI: rm=4; break;
+ case R_DI: rm=5; break;
+ case R_BP: rm=6; break;
+ case R_BX: rm=7; break;
+ }
+ if (rm==-1) /* can't happen, in theory */
+ return NULL; /* so panic if it does */
+
+ if (o==0 && seg==NO_SEG && rm!=6)
+ mod = 0;
+ else if (o>=-128 && o<=127 && seg==NO_SEG)
+ mod = 1;
+ else
+ mod = 2;
+
+ output->sib_present = FALSE; /* no SIB - it's 16-bit */
+ output->bytes = mod; /* bytes of offset needed */
+ output->modrm = (mod<<6) | (rfield<<3) | rm;
+ }
+ }
+ }
+ output->size = 1 + output->sib_present + output->bytes;
+ return output;
+}
+
+static int chsize (operand *input, int addrbits) {
+ if (!(MEMORY & ~input->type)) {
+ int i=input->indexreg, b=input->basereg;
+
+ if (input->scale==0) i = -1;
+
+ if (i == -1 && b == -1) /* pure offset */
+ return (input->addr_size != 0 && input->addr_size != addrbits);
+
+ if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
+ || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
+ || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
+ || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
+ return (addrbits==16);
+ else
+ return (addrbits==32);
+ } else
+ return 0;
+}
diff --git a/assemble.h b/assemble.h
new file mode 100644
index 0000000..cb93a2c
--- /dev/null
+++ b/assemble.h
@@ -0,0 +1,17 @@
+/* assemble.h header file for assemble.c
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#ifndef NASM_ASSEMBLE_H
+#define NASM_ASSEMBLE_H
+
+long insn_size (long segment, long offset, int bits,
+ insn *instruction, efunc error);
+long assemble (long segment, long offset, int bits,
+ insn *instruction, struct ofmt *output, efunc error);
+
+#endif
diff --git a/disasm.c b/disasm.c
new file mode 100644
index 0000000..8ad263b
--- /dev/null
+++ b/disasm.c
@@ -0,0 +1,667 @@
+/* disasm.c where all the _work_ gets done in the Netwide Disassembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ *
+ * initial version 27/iii/95 by Simon Tatham
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "nasm.h"
+#include "disasm.h"
+#include "sync.h"
+#include "insns.h"
+
+#include "names.c"
+
+extern struct itemplate **itable[];
+
+/*
+ * Flags that go into the `segment' field of `insn' structures
+ * during disassembly.
+ */
+#define SEG_RELATIVE 1
+#define SEG_32BIT 2
+#define SEG_RMREG 4
+#define SEG_DISP8 8
+#define SEG_DISP16 16
+#define SEG_DISP32 32
+#define SEG_NODISP 64
+#define SEG_SIGNED 128
+
+static int whichreg(long regflags, int regval) {
+ static int reg32[] = {
+ R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI };
+ static int reg16[] = {
+ R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI };
+ static int reg8[] = {
+ R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH };
+ static int sreg[] = {
+ R_ES, R_CS, R_SS, R_DS, R_FS, R_GS, 0, 0 };
+ static int creg[] = {
+ R_CR0, 0, R_CR2, R_CR3, R_CR4, 0, 0, 0 };
+ static int dreg[] = {
+ R_DR0, R_DR1, R_DR2, R_DR3, 0, 0, R_DR6, R_DR7 };
+ static int treg[] = {
+ 0, 0, 0, R_TR3, R_TR4, R_TR5, R_TR6, R_TR7 };
+ static int fpureg[] = {
+ R_ST0, R_ST1, R_ST2, R_ST3, R_ST4, R_ST5, R_ST6, R_ST7 };
+ static int mmxreg[] = {
+ R_MM0, R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7 };
+
+ if (!(REG_AL & ~regflags))
+ return R_AL;
+ if (!(REG_AX & ~regflags))
+ return R_AX;
+ if (!(REG_EAX & ~regflags))
+ return R_EAX;
+ if (!(REG_DX & ~regflags))
+ return R_DX;
+ if (!(REG_CL & ~regflags))
+ return R_CL;
+ if (!(REG_CX & ~regflags))
+ return R_CX;
+ if (!(REG_ECX & ~regflags))
+ return R_ECX;
+ if (!(REG_CR4 & ~regflags))
+ return R_CR4;
+ if (!(FPU0 & ~regflags))
+ return R_ST0;
+ if (!((REGMEM|BITS8) & ~regflags))
+ return reg8[regval];
+ if (!((REGMEM|BITS16) & ~regflags))
+ return reg16[regval];
+ if (!((REGMEM|BITS32) & ~regflags))
+ return reg32[regval];
+ if (!(REG_SREG & ~regflags))
+ return sreg[regval];
+ if (!(REG_CREG & ~regflags))
+ return creg[regval];
+ if (!(REG_DREG & ~regflags))
+ return dreg[regval];
+ if (!(REG_TREG & ~regflags))
+ return treg[regval];
+ if (!(FPUREG & ~regflags))
+ return fpureg[regval];
+ if (!(MMXREG & ~regflags))
+ return mmxreg[regval];
+ return 0;
+}
+
+static char *whichcond(int condval) {
+ static int conds[] = {
+ C_O, C_NO, C_B, C_AE, C_E, C_NE, C_BE, C_A,
+ C_S, C_NS, C_PE, C_PO, C_L, C_GE, C_LE, C_G
+ };
+ return conditions[conds[condval]];
+}
+
+/*
+ * Process an effective address (ModRM) specification.
+ */
+static unsigned char *do_ea (unsigned char *data, int modrm, int asize,
+ int segsize, operand *op) {
+ int mod, rm, scale, index, base;
+
+ mod = (modrm >> 6) & 03;
+ rm = modrm & 07;
+
+ if (mod == 3) { /* pure register version */
+ op->basereg = rm;
+ op->segment |= SEG_RMREG;
+ return data;
+ }
+
+ op->addr_size = 0;
+
+ if (asize == 16) {
+ /*
+ * <mod> specifies the displacement size (none, byte or
+ * word), and <rm> specifies the register combination.
+ * Exception: mod=0,rm=6 does not specify [BP] as one might
+ * expect, but instead specifies [disp16].
+ */
+ op->indexreg = op->basereg = -1;
+ op->scale = 1; /* always, in 16 bits */
+ switch (rm) {
+ case 0: op->basereg = R_BX; op->indexreg = R_SI; break;
+ case 1: op->basereg = R_BX; op->indexreg = R_DI; break;
+ case 2: op->basereg = R_BP; op->indexreg = R_SI; break;
+ case 3: op->basereg = R_BP; op->indexreg = R_DI; break;
+ case 4: op->basereg = R_SI; break;
+ case 5: op->basereg = R_DI; break;
+ case 6: op->basereg = R_BP; break;
+ case 7: op->basereg = R_BX; break;
+ }
+ if (rm == 6 && mod == 0) { /* special case */
+ op->basereg = -1;
+ if (segsize != 16)
+ op->addr_size = 16;
+ mod = 2; /* fake disp16 */
+ }
+ switch (mod) {
+ case 0:
+ op->segment |= SEG_NODISP;
+ break;
+ case 1:
+ op->segment |= SEG_DISP8;
+ op->offset = (signed char) *data++;
+ break;
+ case 2:
+ op->segment |= SEG_DISP16;
+ op->offset = *data++;
+ op->offset |= (*data++) << 8;
+ break;
+ }
+ return data;
+ } else {
+ /*
+ * Once again, <mod> specifies displacement size (this time
+ * none, byte or *dword*), while <rm> specifies the base
+ * register. Again, [EBP] is missing, replaced by a pure
+ * disp32 (this time that's mod=0,rm=*5*). However, rm=4
+ * indicates not a single base register, but instead the
+ * presence of a SIB byte...
+ */
+ op->indexreg = -1;
+ switch (rm) {
+ case 0: op->basereg = R_EAX; break;
+ case 1: op->basereg = R_ECX; break;
+ case 2: op->basereg = R_EDX; break;
+ case 3: op->basereg = R_EBX; break;
+ case 5: op->basereg = R_EBP; break;
+ case 6: op->basereg = R_ESI; break;
+ case 7: op->basereg = R_EDI; break;
+ }
+ if (rm == 5 && mod == 0) {
+ op->basereg = -1;
+ if (segsize != 32)
+ op->addr_size = 32;
+ mod = 2; /* fake disp32 */
+ }
+ if (rm == 4) { /* process SIB */
+ scale = (*data >> 6) & 03;
+ index = (*data >> 3) & 07;
+ base = *data & 07;
+ data++;
+
+ op->scale = 1 << scale;
+ switch (index) {
+ case 0: op->indexreg = R_EAX; break;
+ case 1: op->indexreg = R_ECX; break;
+ case 2: op->indexreg = R_EDX; break;
+ case 3: op->indexreg = R_EBX; break;
+ case 4: op->indexreg = -1; break;
+ case 5: op->indexreg = R_EBP; break;
+ case 6: op->indexreg = R_ESI; break;
+ case 7: op->indexreg = R_EDI; break;
+ }
+
+ switch (base) {
+ case 0: op->basereg = R_EAX; break;
+ case 1: op->basereg = R_ECX; break;
+ case 2: op->basereg = R_EDX; break;
+ case 3: op->basereg = R_EBX; break;
+ case 4: op->basereg = R_ESP; break;
+ case 6: op->basereg = R_ESI; break;
+ case 7: op->basereg = R_EDI; break;
+ case 5:
+ if (mod == 0) {
+ mod = 2;
+ op->basereg = -1;
+ } else
+ op->basereg = R_EBP;
+ break;
+ }
+ }
+ switch (mod) {
+ case 0:
+ op->segment |= SEG_NODISP;
+ break;
+ case 1:
+ op->segment |= SEG_DISP8;
+ op->offset = (signed char) *data++;
+ break;
+ case 2:
+ op->segment |= SEG_DISP32;
+ op->offset = *data++;
+ op->offset |= (*data++) << 8;
+ op->offset |= ((long) *data++) << 16;
+ op->offset |= ((long) *data++) << 24;
+ break;
+ }
+ return data;
+ }
+}
+
+/*
+ * Determine whether the code string in r corresponds to the data
+ * stream in data. Return the number of bytes matched if so.
+ */
+static int matches (unsigned char *r, unsigned char *data, int asize,
+ int osize, int segsize, insn *ins) {
+ unsigned char *origdata = data;
+ int a_used = FALSE, o_used = FALSE;
+
+ while (*r) {
+ int c = *r++;
+ if (c >= 01 && c <= 03) {
+ while (c--)
+ if (*r++ != *data++)
+ return FALSE;
+ }
+ if (c == 04) {
+ switch (*data++) {
+ case 0x07: ins->oprs[0].basereg = 0; break;
+ case 0x17: ins->oprs[0].basereg = 2; break;
+ case 0x1F: ins->oprs[0].basereg = 3; break;
+ default: return FALSE;
+ }
+ }
+ if (c == 05) {
+ switch (*data++) {
+ case 0xA1: ins->oprs[0].basereg = 4; break;
+ case 0xA9: ins->oprs[0].basereg = 5; break;
+ default: return FALSE;
+ }
+ }
+ if (c == 06) {
+ switch (*data++) {
+ case 0x06: ins->oprs[0].basereg = 0; break;
+ case 0x0E: ins->oprs[0].basereg = 1; break;
+ case 0x16: ins->oprs[0].basereg = 2; break;
+ case 0x1E: ins->oprs[0].basereg = 3; break;
+ default: return FALSE;
+ }
+ }
+ if (c == 07) {
+ switch (*data++) {
+ case 0xA0: ins->oprs[0].basereg = 4; break;
+ case 0xA8: ins->oprs[0].basereg = 5; break;
+ default: return FALSE;
+ }
+ }
+ if (c >= 010 && c <= 012) {
+ int t = *r++, d = *data++;
+ if (d < t || d > t+7)
+ return FALSE;
+ else {
+ ins->oprs[c-010].basereg = d-t;
+ ins->oprs[c-010].segment |= SEG_RMREG;
+ }
+ }
+ if (c == 017)
+ if (*data++)
+ return FALSE;
+ if (c >= 014 && c <= 016) {
+ ins->oprs[c-014].offset = (signed char) *data++;
+ ins->oprs[c-014].segment |= SEG_SIGNED;
+ }
+ if (c >= 020 && c <= 022)
+ ins->oprs[c-020].offset = *data++;
+ if (c >= 024 && c <= 026)
+ ins->oprs[c-024].offset = *data++;
+ if (c >= 030 && c <= 032) {
+ ins->oprs[c-030].offset = *data++;
+ ins->oprs[c-030].offset |= (*data++ << 8);
+ }
+ if (c >= 034 && c <= 036) {
+ ins->oprs[c-034].offset = *data++;
+ ins->oprs[c-034].offset |= (*data++ << 8);
+ if (asize == 32) {
+ ins->oprs[c-034].offset |= (((long) *data++) << 16);
+ ins->oprs[c-034].offset |= (((long) *data++) << 24);
+ }
+ if (segsize != asize)
+ ins->oprs[c-034].addr_size = asize;
+ }
+ if (c >= 040 && c <= 042) {
+ ins->oprs[c-040].offset = *data++;
+ ins->oprs[c-040].offset |= (*data++ << 8);
+ ins->oprs[c-040].offset |= (((long) *data++) << 16);
+ ins->oprs[c-040].offset |= (((long) *data++) << 24);
+ }
+ if (c >= 050 && c <= 052) {
+ ins->oprs[c-050].offset = (signed char) *data++;
+ ins->oprs[c-050].segment |= SEG_RELATIVE;
+ }
+ if (c >= 060 && c <= 062) {
+ ins->oprs[c-060].offset = *data++;
+ ins->oprs[c-060].offset |= (*data++ << 8);
+ ins->oprs[c-060].segment |= SEG_RELATIVE;
+ ins->oprs[c-060].segment &= ~SEG_32BIT;
+ }
+ if (c >= 064 && c <= 066) {
+ ins->oprs[c-064].offset = *data++;
+ ins->oprs[c-064].offset |= (*data++ << 8);
+ if (asize == 32) {
+ ins->oprs[c-064].offset |= (((long) *data++) << 16);
+ ins->oprs[c-064].offset |= (((long) *data++) << 24);
+ ins->oprs[c-064].segment |= SEG_32BIT;
+ } else
+ ins->oprs[c-064].segment &= ~SEG_32BIT;
+ ins->oprs[c-064].segment |= SEG_RELATIVE;
+ if (segsize != asize)
+ ins->oprs[c-064].addr_size = asize;
+ }
+ if (c >= 070 && c <= 072) {
+ ins->oprs[c-070].offset = *data++;
+ ins->oprs[c-070].offset |= (*data++ << 8);
+ ins->oprs[c-070].offset |= (((long) *data++) << 16);
+ ins->oprs[c-070].offset |= (((long) *data++) << 24);
+ ins->oprs[c-070].segment |= SEG_32BIT | SEG_RELATIVE;
+ }
+ if (c >= 0100 && c <= 0177) {
+ int modrm = *data++;
+ ins->oprs[c & 07].basereg = (modrm >> 3) & 07;
+ ins->oprs[c & 07].segment |= SEG_RMREG;
+ data = do_ea (data, modrm, asize, segsize,
+ &ins->oprs[(c >> 3) & 07]);
+ }
+ if (c >= 0200 && c <= 0277) {
+ int modrm = *data++;
+ if (((modrm >> 3) & 07) != (c & 07))
+ return FALSE; /* spare field doesn't match up */
+ data = do_ea (data, modrm, asize, segsize,
+ &ins->oprs[(c >> 3) & 07]);
+ }
+ if (c >= 0300 && c <= 0302) {
+ if (asize)
+ ins->oprs[c-0300].segment |= SEG_32BIT;
+ else
+ ins->oprs[c-0300].segment &= ~SEG_32BIT;
+ a_used = TRUE;
+ }
+ if (c == 0310) {
+ if (asize == 32)
+ return FALSE;
+ else
+ a_used = TRUE;
+ }
+ if (c == 0311) {
+ if (asize == 16)
+ return FALSE;
+ else
+ a_used = TRUE;
+ }
+ if (c == 0312) {
+ if (asize != segsize)
+ return FALSE;
+ else
+ a_used = TRUE;
+ }
+ if (c == 0320) {
+ if (osize == 32)
+ return FALSE;
+ else
+ o_used = TRUE;
+ }
+ if (c == 0321) {
+ if (osize == 16)
+ return FALSE;
+ else
+ o_used = TRUE;
+ }
+ if (c == 0322) {
+ if (osize != segsize)
+ return FALSE;
+ else
+ o_used = TRUE;
+ }
+ if (c == 0330) {
+ int t = *r++, d = *data++;
+ if (d < t || d > t+15)
+ return FALSE;
+ else
+ ins->condition = d - t;
+ }
+ }
+
+ /*
+ * Check for unused a/o prefixes.
+ */
+ ins->nprefix = 0;
+ if (!a_used && asize != segsize)
+ ins->prefixes[ins->nprefix++] = (asize == 16 ? P_A16 : P_A32);
+ if (!o_used && osize != segsize)
+ ins->prefixes[ins->nprefix++] = (osize == 16 ? P_O16 : P_O32);
+
+ return data - origdata;
+}
+
+long disasm (unsigned char *data, char *output, int segsize, long offset,
+ int autosync) {
+ struct itemplate **p;
+ int length = 0;
+ char *segover;
+ int rep, lock, asize, osize, i, slen, colon;
+ unsigned char *origdata;
+ int works;
+ insn ins;
+
+ /*
+ * Scan for prefixes.
+ */
+ asize = osize = segsize;
+ segover = NULL;
+ rep = lock = 0;
+ origdata = data;
+ for (;;) {
+ if (*data == 0xF3 || *data == 0xF2)
+ rep = *data++;
+ else if (*data == 0xF0)
+ lock = *data++;
+ else if (*data == 0x2E || *data == 0x36 || *data == 0x3E ||
+ *data == 0x26 || *data == 0x64 || *data == 0x65) {
+ switch (*data++) {
+ case 0x2E: segover = "cs"; break;
+ case 0x36: segover = "ss"; break;
+ case 0x3E: segover = "ds"; break;
+ case 0x26: segover = "es"; break;
+ case 0x64: segover = "fs"; break;
+ case 0x65: segover = "gs"; break;
+ }
+ } else if (*data == 0x66)
+ osize = 48 - segsize, data++;
+ else if (*data == 0x67)
+ asize = 48 - segsize, data++;
+ else
+ break;
+ }
+
+ ins.oprs[0].segment = ins.oprs[1].segment = ins.oprs[2].segment =
+ ins.oprs[0].addr_size = ins.oprs[1].addr_size = ins.oprs[2].addr_size =
+ (segsize == 16 ? 0 : SEG_32BIT);
+ ins.condition = -1;
+ works = TRUE;
+ for (p = itable[*data]; *p; p++)
+ if ( (length = matches((unsigned char *)((*p)->code), data,
+ asize, osize, segsize, &ins)) ) {
+ works = TRUE;
+ /*
+ * Final check to make sure the types of r/m match up.
+ */
+ for (i = 0; i < (*p)->operands; i++)
+ if (((ins.oprs[i].segment & SEG_RMREG) &&
+ !(MEMORY & ~(*p)->opd[i])) ||
+ (!(ins.oprs[i].segment & SEG_RMREG) &&
+ !(REGNORM & ~(*p)->opd[i]) &&
+ !((*p)->opd[i] & REG_SMASK)))
+ works = FALSE;
+ if (works)
+ break;
+ }
+ if (!length || !works)
+ return 0; /* no instruction was matched */
+
+ slen = 0;
+
+ if (rep) {
+ slen += sprintf(output+slen, "rep%s ",
+ (rep == 0xF2 ? "ne" :
+ (*p)->opcode == I_CMPSB ||
+ (*p)->opcode == I_CMPSW ||
+ (*p)->opcode == I_CMPSD ||
+ (*p)->opcode == I_SCASB ||
+ (*p)->opcode == I_SCASW ||
+ (*p)->opcode == I_SCASD ? "e" : ""));
+ }
+ if (lock)
+ slen += sprintf(output+slen, "lock ");
+ for (i = 0; i < ins.nprefix; i++)
+ switch (ins.prefixes[i]) {
+ case P_A16: slen += sprintf(output+slen, "a16 "); break;
+ case P_A32: slen += sprintf(output+slen, "a32 "); break;
+ case P_O16: slen += sprintf(output+slen, "o16 "); break;
+ case P_O32: slen += sprintf(output+slen, "o32 "); break;
+ }
+
+ for (i = 0; i < elements(ico); i++)
+ if ((*p)->opcode == ico[i]) {
+ slen += sprintf(output+slen, "%s%s", icn[i],
+ whichcond(ins.condition));
+ break;
+ }
+ if (i >= elements(ico))
+ slen += sprintf(output+slen, "%s", insn_names[(*p)->opcode]);
+ colon = FALSE;
+ length += data - origdata; /* fix up for prefixes */
+ for (i=0; i<(*p)->operands; i++) {
+ output[slen++] = (colon ? ':' : i==0 ? ' ' : ',');
+
+ if (ins.oprs[i].segment & SEG_RELATIVE) {
+ ins.oprs[i].offset += offset + length;
+ /*
+ * sort out wraparound
+ */
+ if (!(ins.oprs[i].segment & SEG_32BIT))
+ ins.oprs[i].offset &= 0xFFFF;
+ /*
+ * add sync marker, if autosync is on
+ */
+ if (autosync)
+ add_sync (ins.oprs[i].offset, 0L);
+ }
+
+ if ((*p)->opd[i] & COLON)
+ colon = TRUE;
+ else
+ colon = FALSE;
+
+ if (((*p)->opd[i] & (REGISTER | FPUREG)) ||
+ (ins.oprs[i].segment & SEG_RMREG)) {
+ ins.oprs[i].basereg = whichreg ((*p)->opd[i],
+ ins.oprs[i].basereg);
+ slen += sprintf(output+slen, "%s",
+ reg_names[ins.oprs[i].basereg]);
+ } else if (!(UNITY & ~(*p)->opd[i])) {
+ output[slen++] = '1';
+ } else if ( (*p)->opd[i] & IMMEDIATE ) {
+ if ( (*p)->opd[i] & BITS8 ) {
+ slen += sprintf(output+slen, "byte ");
+ if (ins.oprs[i].segment & SEG_SIGNED) {
+ if (ins.oprs[i].offset < 0) {
+ ins.oprs[i].offset *= -1;
+ output[slen++] = '-';
+ } else
+ output[slen++] = '+';
+ }
+ } else if ( (*p)->opd[i] & BITS16 ) {
+ slen += sprintf(output+slen, "word ");
+ } else if ( (*p)->opd[i] & BITS32 ) {
+ slen += sprintf(output+slen, "dword ");
+ } else if ( (*p)->opd[i] & NEAR ) {
+ slen += sprintf(output+slen, "near ");
+ } else if ( (*p)->opd[i] & SHORT ) {
+ slen += sprintf(output+slen, "short ");
+ }
+ slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset);
+ } else if ( !(MEM_OFFS & ~(*p)->opd[i]) ) {
+ slen += sprintf(output+slen, "[%s%s%s0x%lx]",
+ (segover ? segover : ""),
+ (segover ? ":" : ""),
+ (ins.oprs[i].addr_size == 32 ? "dword " :
+ ins.oprs[i].addr_size == 16 ? "word " : ""),
+ ins.oprs[i].offset);
+ segover = NULL;
+ } else if ( !(REGMEM & ~(*p)->opd[i]) ) {
+ int started = FALSE;
+ if ( (*p)->opd[i] & BITS8 )
+ slen += sprintf(output+slen, "byte ");
+ if ( (*p)->opd[i] & BITS16 )
+ slen += sprintf(output+slen, "word ");
+ if ( (*p)->opd[i] & BITS32 )
+ slen += sprintf(output+slen, "dword ");
+ if ( (*p)->opd[i] & BITS64 )
+ slen += sprintf(output+slen, "qword ");
+ if ( (*p)->opd[i] & BITS80 )
+ slen += sprintf(output+slen, "tword ");
+ if ( (*p)->opd[i] & FAR )
+ slen += sprintf(output+slen, "far ");
+ if ( (*p)->opd[i] & NEAR )
+ slen += sprintf(output+slen, "near ");
+ output[slen++] = '[';
+ if (ins.oprs[i].addr_size)
+ slen += sprintf(output+slen, "%s",
+ (ins.oprs[i].addr_size == 32 ? "dword " :
+ ins.oprs[i].addr_size == 16 ? "word " : ""));
+ if (segover) {
+ slen += sprintf(output+slen, "%s:", segover);
+ segover = NULL;
+ }
+ if (ins.oprs[i].basereg != -1) {
+ slen += sprintf(output+slen, "%s",
+ reg_names[ins.oprs[i].basereg]);
+ started = TRUE;
+ }
+ if (ins.oprs[i].indexreg != -1) {
+ if (started)
+ output[slen++] = '+';
+ slen += sprintf(output+slen, "%s",
+ reg_names[ins.oprs[i].indexreg]);
+ if (ins.oprs[i].scale > 1)
+ slen += sprintf(output+slen, "*%d", ins.oprs[i].scale);
+ started = TRUE;
+ }
+ if (ins.oprs[i].segment & SEG_DISP8) {
+ int sign = '+';
+ if (ins.oprs[i].offset & 0x80) {
+ ins.oprs[i].offset = - (signed char) ins.oprs[i].offset;
+ sign = '-';
+ }
+ slen += sprintf(output+slen, "%c0x%lx", sign,
+ ins.oprs[i].offset);
+ } else if (ins.oprs[i].segment & SEG_DISP16) {
+ if (started)
+ output[slen++] = '+';
+ slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset);
+ } else if (ins.oprs[i].segment & SEG_DISP32) {
+ if (started)
+ output[slen++] = '+';
+ slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset);
+ }
+ output[slen++] = ']';
+ } else {
+ slen += sprintf(output+slen, "<operand%d>", i);
+ }
+ }
+ output[slen] = '\0';
+ if (segover) { /* unused segment override */
+ char *p = output;
+ int count = slen+1;
+ while (count--)
+ p[count+3] = p[count];
+ strncpy (output, segover, 2);
+ output[2] = ' ';
+ }
+ return length;
+}
+
+long eatbyte (unsigned char *data, char *output) {
+ sprintf(output, "db 0x%02X", *data);
+ return 1;
+}
diff --git a/disasm.h b/disasm.h
new file mode 100644
index 0000000..845fd2e
--- /dev/null
+++ b/disasm.h
@@ -0,0 +1,18 @@
+/* disasm.h header file for disasm.c
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#ifndef NASM_DISASM_H
+#define NASM_DISASM_H
+
+#define INSN_MAX 32 /* one instruction can't be longer than this */
+
+long disasm (unsigned char *data, char *output, int segsize, long offset,
+ int autosync);
+long eatbyte (unsigned char *data, char *output);
+
+#endif
diff --git a/float.c b/float.c
new file mode 100644
index 0000000..e9b7f4a
--- /dev/null
+++ b/float.c
@@ -0,0 +1,389 @@
+/* float.c floating-point constant support for the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ *
+ * initial version 13/ix/96 by Simon Tatham
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nasm.h"
+
+#define TRUE 1
+#define FALSE 0
+
+#define MANT_WORDS 6 /* 64 bits + 32 for accuracy == 96 */
+#define MANT_DIGITS 28 /* 29 digits don't fit in 96 bits */
+
+/*
+ * guaranteed top bit of from is set
+ * => we only have to worry about _one_ bit shift to the left
+ */
+
+static int multiply(unsigned short *to, unsigned short *from) {
+ unsigned long temp[MANT_WORDS*2];
+ int i, j;
+
+ for (i=0; i<MANT_WORDS*2; i++)
+ temp[i] = 0;
+
+ for (i=0; i<MANT_WORDS; i++)
+ for (j=0; j<MANT_WORDS; j++) {
+ unsigned long n;
+ n = (unsigned long)to[i] * (unsigned long)from[j];
+ temp[i+j] += n >> 16;
+ temp[i+j+1] += n & 0xFFFF;
+ }
+
+ for (i=MANT_WORDS*2; --i ;) {
+ temp[i-1] += temp[i] >> 16;
+ temp[i] &= 0xFFFF;
+ }
+ if (temp[0] & 0x8000) {
+ for (i=0; i<MANT_WORDS; i++)
+ to[i] = temp[i] & 0xFFFF;
+ return 0;
+ } else {
+ for (i=0; i<MANT_WORDS; i++)
+ to[i] = (temp[i] << 1) + !!(temp[i+1] & 0x8000);
+ return -1;
+ }
+}
+
+static void flconvert(char *string, unsigned short *mant, long *exponent) {
+ char digits[MANT_DIGITS], *p, *q, *r;
+ unsigned short mult[MANT_WORDS], *m, bit;
+ long tenpwr, twopwr;
+ int extratwos, started, seendot;
+
+ p = digits;
+ tenpwr = 0;
+ started = seendot = FALSE;
+ while (*string && *string != 'E' && *string != 'e') {
+ if (*string == '.') {
+ if (!seendot)
+ seendot = TRUE;
+ else {
+ fprintf(stderr, "too many periods!\n");
+ return;
+ }
+ } else if (*string >= '0' && *string <= '9') {
+ if (*string == '0' && !started) {
+ if (seendot)
+ tenpwr--;
+ } else {
+ started = TRUE;
+ if (p < digits+sizeof(digits))
+ *p++ = *string - '0';
+ if (!seendot)
+ tenpwr++;
+ }
+ } else {
+ fprintf(stderr, "`%c' is invalid char\n", *string);
+ return;
+ }
+ string++;
+ }
+ if (*string) {
+ string++; /* eat the E */
+ tenpwr += atoi(string);
+ }
+
+ /*
+ * At this point, the memory interval [digits,p) contains a
+ * series of decimal digits zzzzzzz such that our number X
+ * satisfies
+ *
+ * X = 0.zzzzzzz * 10^tenpwr
+ */
+
+ bit = 0x8000;
+ for (m=mant; m<mant+MANT_WORDS; m++)
+ *m = 0;
+ m = mant;
+ q = digits;
+ started = FALSE;
+ twopwr = 0;
+ while (m < mant+MANT_WORDS) {
+ unsigned short carry = 0;
+ while (p > q && !p[-1])
+ p--;
+ if (p <= q)
+ break;
+ for (r = p; r-- > q ;) {
+ int i;
+
+ i = 2 * *r + carry;
+ if (i >= 10)
+ carry = 1, i -= 10;
+ else
+ carry = 0;
+ *r = i;
+ }
+ if (carry)
+ *m |= bit, started = TRUE;
+ if (started) {
+ if (bit == 1)
+ bit = 0x8000, m++;
+ else
+ bit >>= 1;
+ } else
+ twopwr--;
+ }
+ twopwr += tenpwr;
+
+ /*
+ * At this point the `mant' array contains the first six
+ * fractional places of a base-2^16 real number, which when
+ * multiplied by 2^twopwr and 5^tenpwr gives X. So now we
+ * really do multiply by 5^tenpwr.
+ */
+
+ if (tenpwr < 0) {
+ for (m=mult; m<mult+MANT_WORDS; m++)
+ *m = 0xCCCC;
+ extratwos = -2;
+ tenpwr = -tenpwr;
+ } else if (tenpwr > 0) {
+ mult[0] = 0xA000;
+ for (m=mult+1; m<mult+MANT_WORDS; m++)
+ *m = 0;
+ extratwos = 3;
+ } else
+ extratwos = 0;
+ while (tenpwr) {
+ if (tenpwr & 1)
+ twopwr += extratwos + multiply (mant, mult);
+ extratwos = extratwos * 2 + multiply (mult, mult);
+ tenpwr >>= 1;
+ }
+
+ /*
+ * Conversion is done. The elements of `mant' contain the first
+ * fractional places of a base-2^16 real number in [0.5,1)
+ * which we can multiply by 2^twopwr to get X. Or, of course,
+ * it contains zero.
+ */
+ *exponent = twopwr;
+}
+
+/*
+ * Shift a mantissa to the right by i (i < 16) bits.
+ */
+static void shr(unsigned short *mant, int i) {
+ unsigned short n = 0, m;
+ int j;
+
+ for (j=0; j<MANT_WORDS; j++) {
+ m = (mant[j] << (16-i)) & 0xFFFF;
+ mant[j] = (mant[j] >> i) | n;
+ n = m;
+ }
+}
+
+/*
+ * Round a mantissa off after i words.
+ */
+static int round(unsigned short *mant, int i) {
+ if (mant[i] & 0x8000) {
+ do {
+ ++mant[--i];
+ mant[i] &= 0xFFFF;
+ } while (i > 0 && !mant[i]);
+ return !i && !mant[i];
+ }
+ return 0;
+}
+
+#define put(a,b) ( (*(a)=(b)), ((a)[1]=(b)>>8) )
+
+static int to_double(char *str, long sign, unsigned char *result,
+ efunc error) {
+ unsigned short mant[MANT_WORDS];
+ long exponent;
+
+ sign = (sign < 0 ? 0x8000L : 0L);
+
+ flconvert (str, mant, &exponent);
+ if (mant[0] & 0x8000) {
+ /*
+ * Non-zero.
+ */
+ exponent--;
+ if (exponent >= -1022 && exponent <= 1024) {
+ /*
+ * Normalised.
+ */
+ exponent += 1023;
+ shr(mant, 11);
+ round(mant, 4);
+ if (mant[0] & 0x20) /* did we scale up by one? */
+ shr(mant, 1), exponent++;
+ mant[0] &= 0xF; /* remove leading one */
+ put(result+6,(exponent << 4) | mant[0] | sign);
+ put(result+4,mant[1]);
+ put(result+2,mant[2]);
+ put(result+0,mant[3]);
+ } else if (exponent < -1022 && exponent >= -1074) {
+ /*
+ * Denormal.
+ */
+ int shift = -(exponent+1011);
+ int sh = shift % 16, wds = shift / 16;
+ shr(mant, sh);
+ if (round(mant, 4-wds) || (sh>0 && (mant[0]&(0x8000>>(sh-1))))) {
+ shr(mant, 1);
+ if (sh==0)
+ mant[0] |= 0x8000;
+ exponent++;
+ }
+ put(result+6,(wds == 0 ? mant[0] : 0) | sign);
+ put(result+4,(wds <= 1 ? mant[1-wds] : 0));
+ put(result+2,(wds <= 2 ? mant[2-wds] : 0));
+ put(result+0,(wds <= 3 ? mant[3-wds] : 0));
+ } else {
+ if (exponent > 0) {
+ error(ERR_NONFATAL, "overflow in floating-point constant");
+ return 0;
+ } else
+ memset (result, 0, 8);
+ }
+ } else {
+ /*
+ * Zero.
+ */
+ memset (result, 0, 8);
+ }
+ return 1; /* success */
+}
+
+static int to_float(char *str, long sign, unsigned char *result,
+ efunc error) {
+ unsigned short mant[MANT_WORDS];
+ long exponent;
+
+ sign = (sign < 0 ? 0x8000L : 0L);
+
+ flconvert (str, mant, &exponent);
+ if (mant[0] & 0x8000) {
+ /*
+ * Non-zero.
+ */
+ exponent--;
+ if (exponent >= -126 && exponent <= 128) {
+ /*
+ * Normalised.
+ */
+ exponent += 127;
+ shr(mant, 8);
+ round(mant, 2);
+ if (mant[0] & 0x100) /* did we scale up by one? */
+ shr(mant, 1), exponent++;
+ mant[0] &= 0x7F; /* remove leading one */
+ put(result+2,(exponent << 7) | mant[0] | sign);
+ put(result+0,mant[1]);
+ } else if (exponent < -126 && exponent >= -149) {
+ /*
+ * Denormal.
+ */
+ int shift = -(exponent+118);
+ int sh = shift % 16, wds = shift / 16;
+ shr(mant, sh);
+ if (round(mant, 2-wds) || (sh>0 && (mant[0]&(0x8000>>(sh-1))))) {
+ shr(mant, 1);
+ if (sh==0)
+ mant[0] |= 0x8000;
+ exponent++;
+ }
+ put(result+2,(wds == 0 ? mant[0] : 0) | sign);
+ put(result+0,(wds <= 1 ? mant[1-wds] : 0));
+ } else {
+ if (exponent > 0) {
+ error(ERR_NONFATAL, "overflow in floating-point constant");
+ return 0;
+ } else
+ memset (result, 0, 4);
+ }
+ } else {
+ memset (result, 0, 4);
+ }
+ return 1;
+}
+
+static int to_ldoub(char *str, long sign, unsigned char *result,
+ efunc error) {
+ unsigned short mant[MANT_WORDS];
+ long exponent;
+
+ sign = (sign < 0 ? 0x8000L : 0L);
+
+ flconvert (str, mant, &exponent);
+ if (mant[0] & 0x8000) {
+ /*
+ * Non-zero.
+ */
+ exponent--;
+ if (exponent >= -16383 && exponent <= 16384) {
+ /*
+ * Normalised.
+ */
+ exponent += 16383;
+ if (round(mant, 4)) /* did we scale up by one? */
+ shr(mant, 1), mant[0] |= 0x8000, exponent++;
+ put(result+8,exponent | sign);
+ put(result+6,mant[0]);
+ put(result+4,mant[1]);
+ put(result+2,mant[2]);
+ put(result+0,mant[3]);
+ } else if (exponent < -16383 && exponent >= -16446) {
+ /*
+ * Denormal.
+ */
+ int shift = -(exponent+16383);
+ int sh = shift % 16, wds = shift / 16;
+ shr(mant, sh);
+ if (round(mant, 4-wds) || (sh>0 && (mant[0]&(0x8000>>(sh-1))))) {
+ shr(mant, 1);
+ if (sh==0)
+ mant[0] |= 0x8000;
+ exponent++;
+ }
+ put(result+8,sign);
+ put(result+6,(wds == 0 ? mant[0] : 0));
+ put(result+4,(wds <= 1 ? mant[1-wds] : 0));
+ put(result+2,(wds <= 2 ? mant[2-wds] : 0));
+ put(result+0,(wds <= 3 ? mant[3-wds] : 0));
+ } else {
+ if (exponent > 0) {
+ error(ERR_NONFATAL, "overflow in floating-point constant");
+ return 0;
+ } else
+ memset (result, 0, 10);
+ }
+ } else {
+ /*
+ * Zero.
+ */
+ memset (result, 0, 10);
+ }
+ return 1;
+}
+
+int float_const (char *number, long sign, unsigned char *result, int bytes,
+ efunc error) {
+ if (bytes == 4)
+ return to_float (number, sign, result, error);
+ else if (bytes == 8)
+ return to_double (number, sign, result, error);
+ else if (bytes == 10)
+ return to_ldoub (number, sign, result, error);
+ else {
+ error(ERR_PANIC, "strange value %d passed to float_const", bytes);
+ return 0;
+ }
+}
diff --git a/float.h b/float.h
new file mode 100644
index 0000000..cc01ec0
--- /dev/null
+++ b/float.h
@@ -0,0 +1,16 @@
+/* float.h header file for the floating-point constant module of
+ * the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#ifndef NASM_FLOAT_H
+#define NASM_FLOAT_H
+
+int float_const (char *number, long sign, unsigned char *result, int bytes,
+ efunc error);
+
+#endif
diff --git a/insns.dat b/insns.dat
new file mode 100644
index 0000000..f410613
--- /dev/null
+++ b/insns.dat
@@ -0,0 +1,984 @@
+; insns.dat table of instructions for the Netwide Assembler
+;
+; The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+; Julian Hall. All rights reserved. The software is
+; redistributable under the licence given in the file "Licence"
+; distributed in the NASM archive.
+;
+; Format of file: all four fields must be present on every functional
+; line. Hence `void' for no-operand instructions, and `\0' for such
+; as EQU. If the last three fields are all `ignore', no action is
+; taken except to register the opcode as being present. _ALL_ opcodes
+; listed in the `enum' in nasm.h must be present in here, in the
+; same order. This is to build the main instruction table for NASM.
+
+AAA void \1\x37 8086
+AAD void \2\xD5\x0A 8086
+AAD imm \1\xD5\24 8086,UNDOC
+AAM void \2\xD4\x0A 8086
+AAM imm \1\xD4\24 8086,UNDOC
+AAS void \1\x3F 8086
+ADC mem,reg8 \300\1\x10\101 8086,SM
+ADC reg8,reg8 \300\1\x10\101 8086
+ADC mem,reg16 \320\300\1\x11\101 8086,SM
+ADC reg16,reg16 \320\300\1\x11\101 8086
+ADC mem,reg32 \321\300\1\x11\101 386,SM
+ADC reg32,reg32 \321\300\1\x11\101 386
+ADC reg8,mem \301\1\x12\110 8086,SM
+ADC reg8,reg8 \301\1\x12\110 8086
+ADC reg16,mem \320\301\1\x13\110 8086,SM
+ADC reg16,reg16 \320\301\1\x13\110 8086
+ADC reg32,mem \321\301\1\x13\110 386,SM
+ADC reg32,reg32 \321\301\1\x13\110 386
+ADC rm16,imm8 \320\300\1\x83\202\15 8086
+ADC rm32,imm8 \321\300\1\x83\202\15 386
+ADC reg_al,imm \1\x14\21 8086,SM
+ADC reg_ax,imm \320\1\x15\31 8086,SM
+ADC reg_eax,imm \321\1\x15\41 386,SM
+ADC rm8,imm \300\1\x80\202\21 8086,SM
+ADC rm16,imm \320\300\1\x81\202\31 8086,SM
+ADC rm32,imm \321\300\1\x81\202\41 386,SM
+ADC mem,imm8 \300\1\x80\202\21 8086,SM
+ADC mem,imm16 \320\300\1\x81\202\31 8086,SM
+ADC mem,imm32 \321\300\1\x81\202\41 386,SM
+ADD mem,reg8 \300\17\101 8086,SM
+ADD reg8,reg8 \300\17\101 8086
+ADD mem,reg16 \320\300\1\x01\101 8086,SM
+ADD reg16,reg16 \320\300\1\x01\101 8086
+ADD mem,reg32 \321\300\1\x01\101 386,SM
+ADD reg32,reg32 \321\300\1\x01\101 386
+ADD reg8,mem \301\1\x02\110 8086,SM
+ADD reg8,reg8 \301\1\x02\110 8086
+ADD reg16,mem \320\301\1\x03\110 8086,SM
+ADD reg16,reg16 \320\301\1\x03\110 8086
+ADD reg32,mem \321\301\1\x03\110 386,SM
+ADD reg32,reg32 \321\301\1\x03\110 386
+ADD rm16,imm8 \320\300\1\x83\200\15 8086
+ADD rm32,imm8 \321\300\1\x83\200\15 386
+ADD reg_al,imm \1\x04\21 8086,SM
+ADD reg_ax,imm \320\1\x05\31 8086,SM
+ADD reg_eax,imm \321\1\x05\41 386,SM
+ADD rm8,imm \300\1\x80\200\21 8086,SM
+ADD rm16,imm \320\300\1\x81\200\31 8086,SM
+ADD rm32,imm \321\300\1\x81\200\41 386,SM
+ADD mem,imm8 \300\1\x80\200\21 8086,SM
+ADD mem,imm16 \320\300\1\x81\200\31 8086,SM
+ADD mem,imm32 \321\300\1\x81\200\41 386,SM
+AND mem,reg8 \300\1\x20\101 8086,SM
+AND reg8,reg8 \300\1\x20\101 8086
+AND mem,reg16 \320\300\1\x21\101 8086,SM
+AND reg16,reg16 \320\300\1\x21\101 8086
+AND mem,reg32 \321\300\1\x21\101 386,SM
+AND reg32,reg32 \321\300\1\x21\101 386
+AND reg8,mem \301\1\x22\110 8086,SM
+AND reg8,reg8 \301\1\x22\110 8086
+AND reg16,mem \320\301\1\x23\110 8086,SM
+AND reg16,reg16 \320\301\1\x23\110 8086
+AND reg32,mem \321\301\1\x23\110 386,SM
+AND reg32,reg32 \321\301\1\x23\110 386
+AND rm16,imm8 \320\300\1\x83\204\15 8086
+AND rm32,imm8 \321\300\1\x83\204\15 386
+AND reg_al,imm \1\x24\21 8086,SM
+AND reg_ax,imm \320\1\x25\31 8086,SM
+AND reg_eax,imm \321\1\x25\41 386,SM
+AND rm8,imm \300\1\x80\204\21 8086,SM
+AND rm16,imm \320\300\1\x81\204\31 8086,SM
+AND rm32,imm \321\300\1\x81\204\41 386,SM
+AND mem,imm8 \300\1\x80\204\21 8086,SM
+AND mem,imm16 \320\300\1\x81\204\31 8086,SM
+AND mem,imm32 \321\300\1\x81\204\41 386,SM
+ARPL mem,reg16 \300\1\x63\101 286,PRIV,SM
+ARPL reg16,reg16 \300\1\x63\101 286,PRIV
+BOUND reg16,mem \320\301\1\x62\110 186
+BOUND reg32,mem \321\301\1\x62\110 386
+BSF reg16,mem \320\301\2\x0F\xBC\110 386,SM
+BSF reg16,reg16 \320\301\2\x0F\xBC\110 386
+BSF reg32,mem \321\301\2\x0F\xBC\110 386,SM
+BSF reg32,reg32 \321\301\2\x0F\xBC\110 386
+BSR reg16,mem \320\301\2\x0F\xBD\110 386,SM
+BSR reg16,reg16 \320\301\2\x0F\xBD\110 386
+BSR reg32,mem \321\301\2\x0F\xBD\110 386,SM
+BSR reg32,reg32 \321\301\2\x0F\xBD\110 386
+BSWAP reg32 \321\1\x0F\10\xC8 486
+BT mem,reg16 \320\300\2\x0F\xA3\101 386,SM
+BT reg16,reg16 \320\300\2\x0F\xA3\101 386
+BT mem,reg32 \321\300\2\x0F\xA3\101 386,SM
+BT reg32,reg32 \321\300\2\x0F\xA3\101 386
+BT rm16,imm \320\300\2\x0F\xBA\204\25 386
+BT rm32,imm \321\300\2\x0F\xBA\204\25 386
+BTC mem,reg16 \320\300\2\x0F\xBB\101 386,SM
+BTC reg16,reg16 \320\300\2\x0F\xBB\101 386
+BTC mem,reg32 \321\300\2\x0F\xBB\101 386,SM
+BTC reg32,reg32 \321\300\2\x0F\xBB\101 386
+BTC rm16,imm \320\300\2\x0F\xBA\207\25 386
+BTC rm32,imm \321\300\2\x0F\xBA\207\25 386
+BTR mem,reg16 \320\300\2\x0F\xB3\101 386,SM
+BTR reg16,reg16 \320\300\2\x0F\xB3\101 386
+BTR mem,reg32 \321\300\2\x0F\xB3\101 386,SM
+BTR reg32,reg32 \321\300\2\x0F\xB3\101 386
+BTR rm16,imm \320\300\2\x0F\xBA\206\25 386
+BTR rm32,imm \321\300\2\x0F\xBA\206\25 386
+BTS mem,reg16 \320\300\2\x0F\xAB\101 386,SM
+BTS reg16,reg16 \320\300\2\x0F\xAB\101 386
+BTS mem,reg32 \321\300\2\x0F\xAB\101 386,SM
+BTS reg32,reg32 \321\300\2\x0F\xAB\101 386
+BTS rm16,imm \320\300\2\x0F\xBA\205\25 386
+BTS rm32,imm \321\300\2\x0F\xBA\205\25 386
+CALL imm \322\1\xE8\64 8086
+CALL imm|far \322\1\x9A\34\37 8086
+CALL imm:imm \322\1\x9A\35\30 8086
+CALL imm16:imm \320\1\x9A\31\30 8086
+CALL imm:imm16 \320\1\x9A\31\30 8086
+CALL imm32:imm \321\1\x9A\41\30 386
+CALL imm:imm32 \321\1\x9A\41\30 386
+CALL mem|far \322\300\1\xFF\203 8086
+CALL mem16|far \320\300\1\xFF\203 8086
+CALL mem32|far \321\300\1\xFF\203 386
+CALL mem|near \322\300\1\xFF\202 8086
+CALL mem16|near \320\300\1\xFF\202 8086
+CALL mem32|near \321\300\1\xFF\202 386
+CALL reg16 \320\300\1\xFF\202 8086
+CALL reg32 \321\300\1\xFF\202 386
+CALL mem \322\300\1\xFF\202 8086
+CALL mem16 \320\300\1\xFF\202 8086
+CALL mem32 \321\300\1\xFF\202 386
+CBW void \320\1\x98 8086
+CDQ void \321\1\x99 386
+CLC void \1\xF8 8086
+CLD void \1\xFC 8086
+CLI void \1\xFA 8086
+CLTS void \2\x0F\x06 286,PRIV
+CMC void \1\xF5 8086
+CMP mem,reg8 \300\1\x38\101 8086,SM
+CMP reg8,reg8 \300\1\x38\101 8086
+CMP mem,reg16 \320\300\1\x39\101 8086,SM
+CMP reg16,reg16 \320\300\1\x39\101 8086
+CMP mem,reg32 \321\300\1\x39\101 386,SM
+CMP reg32,reg32 \321\300\1\x39\101 386
+CMP reg8,mem \301\1\x3A\110 8086,SM
+CMP reg8,reg8 \301\1\x3A\110 8086
+CMP reg16,mem \320\301\1\x3B\110 8086,SM
+CMP reg16,reg16 \320\301\1\x3B\110 8086
+CMP reg32,mem \321\301\1\x3B\110 386,SM
+CMP reg32,reg32 \321\301\1\x3B\110 386
+CMP rm16,imm8 \320\300\1\x83\207\15 8086
+CMP rm32,imm8 \321\300\1\x83\207\15 386
+CMP reg_al,imm \1\x3C\21 8086,SM
+CMP reg_ax,imm \320\1\x3D\31 8086,SM
+CMP reg_eax,imm \321\1\x3D\41 386,SM
+CMP rm8,imm \300\1\x80\207\21 8086,SM
+CMP rm16,imm \320\300\1\x81\207\31 8086,SM
+CMP rm32,imm \321\300\1\x81\207\41 386,SM
+CMP mem,imm8 \300\1\x80\207\21 8086,SM
+CMP mem,imm16 \320\300\1\x81\207\31 8086,SM
+CMP mem,imm32 \321\300\1\x81\207\41 386,SM
+CMPSB void \1\xA6 8086
+CMPSD void \321\1\xA7 386
+CMPSW void \320\1\xA7 8086
+CMPXCHG mem,reg8 \300\2\x0F\xA6\101 486,SM
+CMPXCHG reg8,reg8 \300\2\x0F\xA6\101 486
+CMPXCHG mem,reg16 \320\300\2\x0F\xA7\101 486,SM
+CMPXCHG reg16,reg16 \320\300\2\x0F\xA7\101 486
+CMPXCHG mem,reg32 \321\300\2\x0F\xA7\101 486,SM
+CMPXCHG reg32,reg32 \321\300\2\x0F\xA7\101 486
+CMPXCHG8B mem \300\2\x0F\xC7\201 PENT
+CPUID void \2\x0F\xA2 PENT
+CWD void \320\1\x99 8086
+CWDE void \321\1\x98 386
+DAA void \1\x27 8086
+DAS void \1\x2F 8086
+DB ignore ignore ignore
+DD ignore ignore ignore
+DEC reg16 \320\10\x48 8086
+DEC reg32 \321\10\x48 386
+DEC rm8 \300\1\xFE\201 8086
+DEC rm16 \320\300\1\xFF\201 8086
+DEC rm32 \321\300\1\xFF\201 386
+DIV rm8 \300\1\xF6\206 8086
+DIV rm16 \320\300\1\xF7\206 8086
+DIV rm32 \321\300\1\xF7\206 386
+DQ ignore ignore ignore
+DT ignore ignore ignore
+DW ignore ignore ignore
+EMMS void \2\x0F\x77 PENT,MMX
+ENTER imm,imm \1\xC8\30\25 186
+EQU imm \0 8086
+EQU imm:imm \0 8086
+F2XM1 void \2\xD9\xF0 8086,FPU
+FABS void \2\xD9\xE1 8086,FPU
+FADD mem32 \300\1\xD8\200 8086,FPU
+FADD mem64 \300\1\xDC\200 8086,FPU
+FADD fpureg|to \1\xDC\10\xC0 8086,FPU
+FADD fpureg \1\xD8\10\xC0 8086,FPU
+FADD fpureg,fpu0 \1\xDC\10\xC0 8086,FPU
+FADD fpu0,fpureg \1\xD8\11\xC0 8086,FPU
+FADDP fpureg \1\xDE\10\xC0 8086,FPU
+FADDP fpureg,fpu0 \1\xDE\10\xC0 8086,FPU
+FBLD mem80 \300\1\xDF\204 8086,FPU
+FBSTP mem80 \300\1\xDF\206 8086,FPU
+FCHS void \2\xD9\xE0 8086,FPU
+FCLEX void \2\xDB\xE2 8086,FPU
+FCMOVB fpureg \1\xDA\10\xC0 P6,FPU
+FCMOVB fpu0,fpureg \1\xDA\11\xC0 P6,FPU
+FCMOVBE fpureg \1\xDA\10\xD0 P6,FPU
+FCMOVBE fpu0,fpureg \1\xDA\11\xD0 P6,FPU
+FCMOVE fpureg \1\xDA\10\xC8 P6,FPU
+FCMOVE fpu0,fpureg \1\xDA\11\xC8 P6,FPU
+FCMOVNB fpureg \1\xDB\10\xC0 P6,FPU
+FCMOVNB fpu0,fpureg \1\xDB\11\xC0 P6,FPU
+FCMOVNBE fpureg \1\xDB\10\xD0 P6,FPU
+FCMOVNBE fpu0,fpureg \1\xDB\11\xD0 P6,FPU
+FCMOVNE fpureg \1\xDB\10\xC8 P6,FPU
+FCMOVNE fpu0,fpureg \1\xDB\11\xC8 P6,FPU
+FCMOVNU fpureg \1\xDB\10\xD8 P6,FPU
+FCMOVNU fpu0,fpureg \1\xDB\11\xD8 P6,FPU
+FCMOVU fpureg \1\xDA\10\xD8 P6,FPU
+FCMOVU fpu0,fpureg \1\xDA\11\xD8 P6,FPU
+FCOM mem32 \300\1\xD8\202 8086,FPU
+FCOM mem64 \300\1\xDC\202 8086,FPU
+FCOM fpureg \1\xD8\10\xD0 8086,FPU
+FCOM fpu0,fpureg \1\xD8\11\xD0 8086,FPU
+FCOMI fpureg \1\xDB\10\xF0 P6,FPU
+FCOMI fpu0,fpureg \1\xDB\11\xF0 P6,FPU
+FCOMIP fpureg \1\xDF\10\xF0 P6,FPU
+FCOMIP fpu0,fpureg \1\xDF\11\xF0 P6,FPU
+FCOMP mem32 \300\1\xD8\203 8086,FPU
+FCOMP mem64 \300\1\xDC\203 8086,FPU
+FCOMP fpureg \1\xD8\10\xD8 8086,FPU
+FCOMP fpu0,fpureg \1\xD8\11\xD8 8086,FPU
+FCOMPP void \2\xDE\xD9 8086,FPU
+FCOS void \2\xD9\xFF 386,FPU
+FDECSTP void \2\xD9\xF6 8086,FPU
+FDISI void \2\xDB\xE1 8086,FPU
+FDIV mem32 \300\1\xD8\206 8086,FPU
+FDIV mem64 \300\1\xDC\206 8086,FPU
+FDIV fpureg|to \1\xDC\10\xF0 8086,FPU
+FDIV fpureg,fpu0 \1\xDC\10\xF0 8086,FPU
+FDIV fpureg \1\xD8\10\xF0 8086,FPU
+FDIV fpu0,fpureg \1\xD8\11\xF0 8086,FPU
+FDIVP fpureg,fpu0 \1\xDE\10\xF0 8086,FPU
+FDIVP fpureg \1\xDE\10\xF0 8086,FPU
+FDIVR mem32 \300\1\xD8\207 8086,FPU
+FDIVR mem64 \300\1\xDC\207 8086,FPU
+FDIVR fpureg|to \1\xDC\10\xF8 8086,FPU
+FDIVR fpureg,fpu0 \1\xDC\10\xF8 8086,FPU
+FDIVR fpureg \1\xD8\10\xF8 8086,FPU
+FDIVR fpu0,fpureg \1\xD8\11\xF8 8086,FPU
+FDIVRP fpureg \1\xDE\10\xF8 8086,FPU
+FDIVRP fpureg,fpu0 \1\xDE\10\xF8 8086,FPU
+FENI void \2\xDB\xE0 8086,FPU
+FFREE fpureg \1\xDD\10\xC0 8086,FPU
+FIADD mem32 \300\1\xDA\200 8086,FPU
+FIADD mem16 \300\1\xDE\200 8086,FPU
+FICOM mem32 \300\1\xDA\202 8086,FPU
+FICOM mem16 \300\1\xDE\202 8086,FPU
+FICOMP mem32 \300\1\xDA\203 8086,FPU
+FICOMP mem16 \300\1\xDE\203 8086,FPU
+FIDIV mem32 \300\1\xDA\206 8086,FPU
+FIDIV mem16 \300\1\xDE\206 8086,FPU
+FIDIVR mem32 \300\1\xDA\207 8086,FPU
+FIDIVR mem16 \300\1\xDE\207 8086,FPU
+FILD mem32 \300\1\xDB\200 8086,FPU
+FILD mem16 \300\1\xDF\200 8086,FPU
+FILD mem64 \300\1\xDF\205 8086,FPU
+FIMUL mem32 \300\1\xDA\201 8086,FPU
+FIMUL mem16 \300\1\xDE\201 8086,FPU
+FINCSTP void \2\xD9\xF7 8086,FPU
+FINIT void \2\xDB\xE3 8086,FPU
+FIST mem32 \300\1\xDB\202 8086,FPU
+FIST mem16 \300\1\xDF\202 8086,FPU
+FISTP mem32 \300\1\xDB\203 8086,FPU
+FISTP mem16 \300\1\xDF\203 8086,FPU
+FISTP mem64 \300\1\xDF\207 8086,FPU
+FISUB mem32 \300\1\xDA\204 8086,FPU
+FISUB mem16 \300\1\xDE\204 8086,FPU
+FISUBR mem32 \300\1\xDA\205 8086,FPU
+FISUBR mem16 \300\1\xDE\205 8086,FPU
+FLD mem32 \300\1\xD9\200 8086,FPU
+FLD mem64 \300\1\xDD\200 8086,FPU
+FLD mem80 \300\1\xDB\205 8086,FPU
+FLD fpureg \1\xD9\10\xC0 8086,FPU
+FLD1 void \2\xD9\xE8 8086,FPU
+FLDCW mem \300\1\xD9\205 8086,FPU
+FLDENV mem \300\1\xD9\204 8086,FPU
+FLDL2E void \2\xD9\xEA 8086,FPU
+FLDL2T void \2\xD9\xE9 8086,FPU
+FLDLG2 void \2\xD9\xEC 8086,FPU
+FLDLN2 void \2\xD9\xED 8086,FPU
+FLDPI void \2\xD9\xEB 8086,FPU
+FLDZ void \2\xD9\xEE 8086,FPU
+FMUL mem32 \300\1\xD8\201 8086,FPU
+FMUL mem64 \300\1\xDC\201 8086,FPU
+FMUL fpureg|to \1\xDC\10\xC8 8086,FPU
+FMUL fpureg,fpu0 \1\xDC\10\xC8 8086,FPU
+FMUL fpureg \1\xD8\10\xC8 8086,FPU
+FMUL fpu0,fpureg \1\xD8\11\xC8 8086,FPU
+FMULP fpureg \1\xDE\10\xC8 8086,FPU
+FMULP fpureg,fpu0 \1\xDE\10\xC8 8086,FPU
+FNOP void \2\xD9\xD0 8086,FPU
+FPATAN void \2\xD9\xF3 8086,FPU
+FPREM void \2\xD9\xF8 8086,FPU
+FPREM1 void \2\xD9\xF5 386,FPU
+FPTAN void \2\xD9\xF2 8086,FPU
+FRNDINT void \2\xD9\xFC 8086,FPU
+FRSTOR mem \300\1\xDD\204 8086,FPU
+FSAVE mem \300\1\xDD\206 8086,FPU
+FSCALE void \2\xD9\xFD 8086,FPU
+FSETPM void \2\xDB\xE4 286,FPU
+FSIN void \2\xD9\xFE 386,FPU
+FSINCOS void \2\xD9\xFB 386,FPU
+FSQRT void \2\xD9\xFA 8086,FPU
+FST mem32 \300\1\xD9\202 8086,FPU
+FST mem64 \300\1\xDD\202 8086,FPU
+FST fpureg \1\xDD\10\xD0 8086,FPU
+FSTCW mem \300\1\xD9\207 8086,FPU
+FSTENV mem \300\1\xD9\206 8086,FPU
+FSTP mem32 \300\1\xD9\203 8086,FPU
+FSTP mem64 \300\1\xDD\203 8086,FPU
+FSTP mem80 \300\1\xDB\207 8086,FPU
+FSTP fpureg \1\xDD\10\xD8 8086,FPU
+FSTSW mem \300\1\xDD\207 8086,FPU
+FSTSW reg_ax \2\xDF\xE0 286,FPU
+FSUB mem32 \300\1\xD8\204 8086,FPU
+FSUB mem64 \300\1\xDC\204 8086,FPU
+FSUB fpureg|to \1\xDC\10\xE0 8086,FPU
+FSUB fpureg,fpu0 \1\xDC\10\xE0 8086,FPU
+FSUB fpureg \1\xD8\10\xE0 8086,FPU
+FSUB fpu0,fpureg \1\xD8\11\xE0 8086,FPU
+FSUBP fpureg \1\xDE\10\xE0 8086,FPU
+FSUBP fpureg,fpu0 \1\xDE\10\xE0 8086,FPU
+FSUBR mem32 \300\1\xD8\205 8086,FPU
+FSUBR mem64 \300\1\xDC\205 8086,FPU
+FSUBR fpureg|to \1\xDC\10\xE8 8086,FPU
+FSUBR fpureg,fpu0 \1\xDC\10\xE8 8086,FPU
+FSUBR fpureg \1\xD8\10\xE8 8086,FPU
+FSUBR fpu0,fpureg \1\xD8\11\xE8 8086,FPU
+FSUBRP fpureg \1\xDE\10\xE8 8086,FPU
+FSUBRP fpureg,fpu0 \1\xDE\10\xE8 8086,FPU
+FTST void \2\xD9\xE4 8086,FPU
+FUCOM fpureg \1\xDD\10\xE0 386,FPU
+FUCOMI fpureg \1\xDB\10\xE8 P6,FPU
+FUCOMI fpu0,fpureg \1\xDB\11\xE8 P6,FPU
+FUCOMIP fpureg \1\xDF\10\xE8 P6,FPU
+FUCOMIP fpu0,fpureg \1\xDF\11\xE8 P6,FPU
+FUCOMP fpureg \1\xDD\10\xE8 386,FPU
+FUCOMPP void \2\xDA\xE9 386,FPU
+FXAM void \2\xD9\xE5 8086,FPU
+FXCH void \2\xD9\xC9 8086,FPU
+FXCH fpureg \1\xD9\10\xC8 8086,FPU
+FXCH fpureg,fpu0 \1\xD9\10\xC8 8086,FPU
+FXCH fpu0,fpureg \1\xD9\11\xC8 8086,FPU
+FXTRACT void \2\xD9\xF4 8086,FPU
+FYL2X void \2\xD9\xF1 8086,FPU
+FYL2XP1 void \2\xD9\xF9 8086,FPU
+HLT void \1\xF4 8086
+ICEBP void \1\xF1 286,UNDOC
+IDIV rm8 \300\1\xF6\207 8086
+IDIV rm16 \320\300\1\xF7\207 8086
+IDIV rm32 \321\300\1\xF7\207 386
+IMUL rm8 \300\1\xF6\205 8086
+IMUL rm16 \320\300\1\xF7\205 8086
+IMUL rm32 \321\300\1\xF7\205 386
+IMUL reg16,mem \320\301\2\x0F\xAF\110 386,SM
+IMUL reg16,reg16 \320\301\2\x0F\xAF\110 386
+IMUL reg32,mem \321\301\2\x0F\xAF\110 386,SM
+IMUL reg32,reg32 \321\301\2\x0F\xAF\110 386
+IMUL reg16,mem,imm8 \320\301\1\x6B\110\16 286,SM
+IMUL reg16,reg16,imm8 \320\301\1\x6B\110\16 286
+IMUL reg16,mem,imm \320\301\1\x69\110\32 286,SM
+IMUL reg16,reg16,imm \320\301\1\x69\110\32 286
+IMUL reg32,mem,imm8 \321\301\1\x6B\110\16 386,SM
+IMUL reg32,reg32,imm8 \321\301\1\x6B\110\16 386
+IMUL reg32,mem,imm \321\301\1\x69\110\42 386,SM
+IMUL reg32,reg32,imm \321\301\1\x69\110\42 386,SM
+IMUL reg16,imm8 \320\1\x6B\100\15 286
+IMUL reg16,imm \320\1\x69\100\31 286,SM
+IMUL reg32,imm8 \321\1\x6B\100\15 386
+IMUL reg32,imm \321\1\x69\100\41 386,SM
+IN reg_al,imm \1\xE4\25 8086
+IN reg_ax,imm \320\1\xE5\25 8086
+IN reg_eax,imm \321\1\xE5\25 386
+IN reg_al,reg_dx \1\xEC 8086
+IN reg_ax,reg_dx \320\1\xED 8086
+IN reg_eax,reg_dx \321\1\xED 386
+INC reg16 \320\10\x40 8086
+INC reg32 \321\10\x40 386
+INC rm8 \300\1\xFE\200 8086
+INC rm16 \320\300\1\xFF\200 8086
+INC rm32 \321\300\1\xFF\200 386
+INSB void \1\x6C 186
+INSD void \321\1\x6D 386
+INSW void \320\1\x6D 186
+INT imm \1\xCD\24 8086
+INT01 void \1\xF1 286,UNDOC
+INT1 void \1\xF1 286,UNDOC
+INT3 void \1\xCC 8086
+INTO void \1\xCE 8086
+INVD void \2\x0F\x08 486
+INVLPG mem \300\2\x0F\x01\207 486
+IRET void \1\xCF 8086
+IRETD void \321\1\xCF 386
+IRETW void \320\1\xCF 8086
+JCXZ imm \320\1\xE3\50 8086
+JECXZ imm \321\1\xE3\50 386
+JMP imm|short \1\xEB\50 8086
+JMP imm \322\1\xE9\64 8086
+JMP imm|far \322\1\xEA\34\37 8086
+JMP imm:imm \322\1\xEA\35\30 8086
+JMP imm16:imm \320\1\xEA\31\30 8086
+JMP imm:imm16 \320\1\xEA\31\30 8086
+JMP imm32:imm \321\1\xEA\41\30 386
+JMP imm:imm32 \321\1\xEA\41\30 386
+JMP mem|far \322\300\1\xFF\205 8086
+JMP mem16|far \320\300\1\xFF\205 8086
+JMP mem32|far \321\300\1\xFF\205 386
+JMP mem|near \322\300\1\xFF\204 8086
+JMP mem16|near \320\300\1\xFF\204 8086
+JMP mem32|near \321\300\1\xFF\204 386
+JMP reg16 \320\300\1\xFF\204 8086
+JMP reg32 \321\300\1\xFF\204 386
+JMP mem \322\300\1\xFF\204 8086
+JMP mem16 \320\300\1\xFF\204 8086
+JMP mem32 \321\300\1\xFF\204 386
+LAHF void \1\x9F 8086
+LAR reg16,mem \320\301\2\x0F\x02\110 286,PRIV,SM
+LAR reg16,reg16 \320\301\2\x0F\x02\110 286,PRIV
+LAR reg32,mem \321\301\2\x0F\x02\110 286,PRIV,SM
+LAR reg32,reg32 \321\301\2\x0F\x02\110 286,PRIV
+LDS reg16,mem \320\301\1\xC5\110 8086
+LDS reg32,mem \321\301\1\xC5\110 8086
+LEA reg16,mem \320\301\1\x8D\110 8086
+LEA reg32,mem \321\301\1\x8D\110 8086
+LEAVE void \1\xC9 186
+LES reg16,mem \320\301\1\xC4\110 8086
+LES reg32,mem \321\301\1\xC4\110 8086
+LFS reg16,mem \320\301\2\x0F\xB4\110 386
+LFS reg32,mem \321\301\2\x0F\xB4\110 386
+LGDT mem \300\2\x0F\x01\202 286,PRIV
+LGS reg16,mem \320\301\2\x0F\xB5\110 386
+LGS reg32,mem \321\301\2\x0F\xB5\110 386
+LIDT mem \300\2\x0F\x01\203 286,PRIV
+LLDT mem \300\1\x0F\17\202 286,PRIV
+LLDT mem16 \300\1\x0F\17\202 286,PRIV
+LLDT reg16 \300\1\x0F\17\202 286,PRIV
+LMSW mem \300\2\x0F\x01\206 286,PRIV
+LMSW mem16 \300\2\x0F\x01\206 286,PRIV
+LMSW reg16 \300\2\x0F\x01\206 286,PRIV
+LOADALL void \2\x0F\x07 386,UNDOC
+LODSB void \1\xAC 8086
+LODSD void \321\1\xAD 386
+LODSW void \320\1\xAD 8086
+LOOP imm \312\1\xE2\50 8086
+LOOP imm,reg_cx \310\1\xE2\50 8086
+LOOP imm,reg_ecx \311\1\xE2\50 386
+LOOPE imm \312\1\xE1\50 8086
+LOOPE imm,reg_cx \310\1\xE1\50 8086
+LOOPE imm,reg_ecx \311\1\xE1\50 386
+LOOPNE imm \312\1\xE0\50 8086
+LOOPNE imm,reg_cx \310\1\xE0\50 8086
+LOOPNE imm,reg_ecx \311\1\xE0\50 386
+LOOPNZ imm \312\1\xE0\50 8086
+LOOPNZ imm,reg_cx \310\1\xE0\50 8086
+LOOPNZ imm,reg_ecx \311\1\xE0\50 386
+LOOPZ imm \312\1\xE1\50 8086
+LOOPZ imm,reg_cx \310\1\xE1\50 8086
+LOOPZ imm,reg_ecx \311\1\xE1\50 386
+LSL reg16,mem \320\301\2\x0F\x03\110 286,PRIV,SM
+LSL reg16,reg16 \320\301\2\x0F\x03\110 286,PRIV
+LSL reg32,mem \321\301\2\x0F\x03\110 286,PRIV,SM
+LSL reg32,reg32 \321\301\2\x0F\x03\110 286,PRIV
+LSS reg16,mem \320\301\2\x0F\xB2\110 386
+LSS reg32,mem \321\301\2\x0F\xB2\110 386
+LTR mem \300\1\x0F\17\203 286,PRIV
+LTR mem16 \300\1\x0F\17\203 286,PRIV
+LTR reg16 \300\1\x0F\17\203 286,PRIV
+MOV mem,reg_cs \300\1\x8C\101 8086,SM
+MOV mem,reg_dess \300\1\x8C\101 8086,SM
+MOV mem,reg_fsgs \300\1\x8C\101 386,SM
+MOV reg16,reg_cs \300\1\x8C\101 8086
+MOV reg16,reg_dess \300\1\x8C\101 8086
+MOV reg16,reg_fsgs \300\1\x8C\101 386
+MOV reg_dess,mem \301\1\x8E\110 8086,SM
+MOV reg_dess,reg16 \301\1\x8E\110 8086
+MOV reg_fsgs,mem \301\1\x8E\110 386,SM
+MOV reg_fsgs,reg16 \301\1\x8E\110 386
+MOV reg_al,mem_offs \301\1\xA0\35 8086,SM
+MOV reg_ax,mem_offs \301\320\1\xA1\35 8086,SM
+MOV reg_eax,mem_offs \301\321\1\xA1\35 386,SM
+MOV mem_offs,reg_al \300\1\xA2\34 8086,SM
+MOV mem_offs,reg_ax \300\320\1\xA3\34 8086,SM
+MOV mem_offs,reg_eax \300\321\1\xA3\34 386,SM
+MOV reg32,reg_cr4 \2\x0F\x20\204 PENT
+MOV reg32,reg_creg \2\x0F\x20\101 386
+MOV reg32,reg_dreg \2\x0F\x21\101 386
+MOV reg32,reg_treg \2\x0F\x24\101 386
+MOV reg_cr4,reg32 \2\x0F\x22\214 PENT
+MOV reg_creg,reg32 \2\x0F\x22\110 386
+MOV reg_dreg,reg32 \2\x0F\x23\110 386
+MOV reg_treg,reg32 \2\x0F\x26\110 386
+MOV mem,reg8 \300\1\x88\101 8086,SM
+MOV reg8,reg8 \300\1\x88\101 8086
+MOV mem,reg16 \320\300\1\x89\101 8086,SM
+MOV reg16,reg16 \320\300\1\x89\101 8086
+MOV mem,reg32 \321\300\1\x89\101 386,SM
+MOV reg32,reg32 \321\300\1\x89\101 386
+MOV reg8,mem \301\1\x8A\110 8086,SM
+MOV reg8,reg8 \301\1\x8A\110 8086
+MOV reg16,mem \320\301\1\x8B\110 8086,SM
+MOV reg16,reg16 \320\301\1\x8B\110 8086
+MOV reg32,mem \321\301\1\x8B\110 386,SM
+MOV reg32,reg32 \321\301\1\x8B\110 386
+MOV reg8,imm \10\xB0\21 8086,SM
+MOV reg16,imm \320\10\xB8\31 8086,SM
+MOV reg32,imm \321\10\xB8\41 386,SM
+MOV rm8,imm \300\1\xC6\200\21 8086,SM
+MOV rm16,imm \320\300\1\xC7\200\31 8086,SM
+MOV rm32,imm \321\300\1\xC7\200\41 386,SM
+MOV mem,imm8 \300\1\xC6\200\21 8086,SM
+MOV mem,imm16 \320\300\1\xC7\200\31 8086,SM
+MOV mem,imm32 \321\300\1\xC7\200\41 386,SM
+MOVD mmxreg,mem \301\2\x0F\x6E\110 PENT,MMX,SD
+MOVD mmxreg,reg32 \2\x0F\x6E\110 PENT,MMX
+MOVD mem,mmxreg \300\2\x0F\x7E\101 PENT,MMX,SD
+MOVD reg32,mmxreg \2\x0F\x7E\101 PENT,MMX
+MOVQ mmxreg,mem \301\2\x0F\x6F\110 PENT,MMX,SM
+MOVQ mmxreg,mmxreg \2\x0F\x6F\110 PENT,MMX
+MOVQ mem,mmxreg \300\2\x0F\x7F\101 PENT,MMX,SM
+MOVQ mmxreg,mmxreg \2\x0F\x7F\101 PENT,MMX
+MOVSB void \1\xA4 8086
+MOVSD void \321\1\xA5 386
+MOVSW void \320\1\xA5 8086
+MOVSX reg16,mem \320\301\2\x0F\xBE\110 386,SB
+MOVSX reg16,reg8 \320\301\2\x0F\xBE\110 386
+MOVSX reg32,rm8 \321\301\2\x0F\xBE\110 386
+MOVSX reg32,rm16 \321\301\2\x0F\xBF\110 386
+MOVZX reg16,mem \320\301\2\x0F\xB6\110 386,SB
+MOVZX reg16,reg8 \320\301\2\x0F\xB6\110 386
+MOVZX reg32,rm8 \321\301\2\x0F\xB6\110 386
+MOVZX reg32,rm16 \321\301\2\x0F\xB7\110 386
+MUL rm8 \300\1\xF6\204 8086
+MUL rm16 \320\300\1\xF7\204 8086
+MUL rm32 \321\300\1\xF7\204 386
+NEG rm8 \300\1\xF6\203 8086
+NEG rm16 \320\300\1\xF7\203 8086
+NEG rm32 \321\300\1\xF7\203 386
+NOP void \1\x90 8086
+NOT rm8 \300\1\xF6\202 8086
+NOT rm16 \320\300\1\xF7\202 8086
+NOT rm32 \321\300\1\xF7\202 386
+OR mem,reg8 \300\1\x08\101 8086,SM
+OR reg8,reg8 \300\1\x08\101 8086
+OR mem,reg16 \320\300\1\x09\101 8086,SM
+OR reg16,reg16 \320\300\1\x09\101 8086
+OR mem,reg32 \321\300\1\x09\101 386,SM
+OR reg32,reg32 \321\300\1\x09\101 386
+OR reg8,mem \301\1\x0A\110 8086,SM
+OR reg8,reg8 \301\1\x0A\110 8086
+OR reg16,mem \320\301\1\x0B\110 8086,SM
+OR reg16,reg16 \320\301\1\x0B\110 8086
+OR reg32,mem \321\301\1\x0B\110 386,SM
+OR reg32,reg32 \321\301\1\x0B\110 386
+OR rm16,imm8 \320\300\1\x83\201\15 8086
+OR rm32,imm8 \321\300\1\x83\201\15 386
+OR reg_al,imm \1\x0C\21 8086,SM
+OR reg_ax,imm \320\1\x0D\31 8086,SM
+OR reg_eax,imm \321\1\x0D\41 386,SM
+OR rm8,imm \300\1\x80\201\21 8086,SM
+OR rm16,imm \320\300\1\x81\201\31 8086,SM
+OR rm32,imm \321\300\1\x81\201\41 386,SM
+OR mem,imm8 \300\1\x80\201\21 8086,SM
+OR mem,imm16 \320\300\1\x81\201\31 8086,SM
+OR mem,imm32 \321\300\1\x81\201\41 386,SM
+OUT imm,reg_al \1\xE6\24 8086
+OUT imm,reg_ax \320\1\xE7\24 8086
+OUT imm,reg_eax \321\1\xE7\24 386
+OUT reg_dx,reg_al \1\xEE 8086
+OUT reg_dx,reg_ax \320\1\xEF 8086
+OUT reg_dx,reg_eax \321\1\xEF 386
+OUTSB void \1\x6E 186
+OUTSD void \321\1\x6F 386
+OUTSW void \320\1\x6F 186
+PACKSSDW mmxreg,mem \301\2\x0F\x6B\110 PENT,MMX,SM
+PACKSSDW mmxreg,mmxreg \2\x0F\x6B\110 PENT,MMX
+PACKSSWB mmxreg,mem \301\2\x0F\x63\110 PENT,MMX,SM
+PACKSSWB mmxreg,mmxreg \2\x0F\x63\110 PENT,MMX
+PACKUSWB mmxreg,mem \301\2\x0F\x67\110 PENT,MMX,SM
+PACKUSWB mmxreg,mmxreg \2\x0F\x67\110 PENT,MMX
+PADDB mmxreg,mem \301\2\x0F\xFC\110 PENT,MMX,SM
+PADDB mmxreg,mmxreg \2\x0F\xFC\110 PENT,MMX
+PADDD mmxreg,mem \301\2\x0F\xFE\110 PENT,MMX,SM
+PADDD mmxreg,mmxreg \2\x0F\xFE\110 PENT,MMX
+PADDSB mmxreg,mem \301\2\x0F\xEC\110 PENT,MMX,SM
+PADDSB mmxreg,mmxreg \2\x0F\xEC\110 PENT,MMX
+PADDSW mmxreg,mem \301\2\x0F\xED\110 PENT,MMX,SM
+PADDSW mmxreg,mmxreg \2\x0F\xED\110 PENT,MMX
+PADDUSB mmxreg,mem \301\2\x0F\xDC\110 PENT,MMX,SM
+PADDUSB mmxreg,mmxreg \2\x0F\xDC\110 PENT,MMX
+PADDUSW mmxreg,mem \301\2\x0F\xDD\110 PENT,MMX,SM
+PADDUSW mmxreg,mmxreg \2\x0F\xDD\110 PENT,MMX
+PADDW mmxreg,mem \301\2\x0F\xFD\110 PENT,MMX,SM
+PADDW mmxreg,mmxreg \2\x0F\xFD\110 PENT,MMX
+PAND mmxreg,mem \301\2\x0F\xDB\110 PENT,MMX,SM
+PAND mmxreg,mmxreg \2\x0F\xDB\110 PENT,MMX
+PANDN mmxreg,mem \301\2\x0F\xDF\110 PENT,MMX,SM
+PANDN mmxreg,mmxreg \2\x0F\xDF\110 PENT,MMX
+PCMPEQB mmxreg,mem \301\2\x0F\x74\110 PENT,MMX,SM
+PCMPEQB mmxreg,mmxreg \2\x0F\x74\110 PENT,MMX
+PCMPEQD mmxreg,mem \301\2\x0F\x76\110 PENT,MMX,SM
+PCMPEQD mmxreg,mmxreg \2\x0F\x76\110 PENT,MMX
+PCMPEQW mmxreg,mem \301\2\x0F\x75\110 PENT,MMX,SM
+PCMPEQW mmxreg,mmxreg \2\x0F\x75\110 PENT,MMX
+PCMPGTB mmxreg,mem \301\2\x0F\x64\110 PENT,MMX,SM
+PCMPGTB mmxreg,mmxreg \2\x0F\x64\110 PENT,MMX
+PCMPGTD mmxreg,mem \301\2\x0F\x66\110 PENT,MMX,SM
+PCMPGTD mmxreg,mmxreg \2\x0F\x66\110 PENT,MMX
+PCMPGTW mmxreg,mem \301\2\x0F\x65\110 PENT,MMX,SM
+PCMPGTW mmxreg,mmxreg \2\x0F\x65\110 PENT,MMX
+PMADDWD mmxreg,mem \301\2\x0F\xF5\110 PENT,MMX,SM
+PMADDWD mmxreg,mmxreg \2\x0F\xF5\110 PENT,MMX
+PMULHW mmxreg,mem \301\2\x0F\xE5\110 PENT,MMX,SM
+PMULHW mmxreg,mmxreg \2\x0F\xE5\110 PENT,MMX
+PMULLW mmxreg,mem \301\2\x0F\xD5\110 PENT,MMX,SM
+PMULLW mmxreg,mmxreg \2\x0F\xD5\110 PENT,MMX
+POP mem16 \320\300\1\x8F\200 8086
+POP mem32 \321\300\1\x8F\200 386
+POP reg_dess \4 8086
+POP reg_fsgs \1\x0F\5 386
+POP reg16 \320\10\x58 8086
+POP reg32 \321\10\x58 386
+POPA void \1\x61 186
+POPAD void \321\1\x61 386
+POPAW void \320\1\x61 186
+POPF void \1\x9D 186
+POPFD void \321\1\x9D 386
+POPFW void \320\1\x9D 186
+POR mmxreg,mem \301\2\x0F\xEB\110 PENT,MMX,SM
+POR mmxreg,mmxreg \2\x0F\xEB\110 PENT,MMX
+PSLLD mmxreg,mem \301\2\x0F\xF2\110 PENT,MMX,SM
+PSLLD mmxreg,mmxreg \2\x0F\xF2\110 PENT,MMX
+PSLLD mmxreg,imm \2\x0F\x72\206\25 PENT,MMX
+PSLLQ mmxreg,mem \301\2\x0F\xF3\110 PENT,MMX,SM
+PSLLQ mmxreg,mmxreg \2\x0F\xF3\110 PENT,MMX
+PSLLQ mmxreg,imm \2\x0F\x73\206\25 PENT,MMX
+PSLLW mmxreg,mem \301\2\x0F\xF1\110 PENT,MMX,SM
+PSLLW mmxreg,mmxreg \2\x0F\xF1\110 PENT,MMX
+PSLLW mmxreg,imm \2\x0F\x71\206\25 PENT,MMX
+PSRAD mmxreg,mem \301\2\x0F\xE2\110 PENT,MMX,SM
+PSRAD mmxreg,mmxreg \2\x0F\xE2\110 PENT,MMX
+PSRAD mmxreg,imm \2\x0F\x72\204\25 PENT,MMX
+PSRAW mmxreg,mem \301\2\x0F\xE1\110 PENT,MMX,SM
+PSRAW mmxreg,mmxreg \2\x0F\xE1\110 PENT,MMX
+PSRAW mmxreg,imm \2\x0F\x71\204\25 PENT,MMX
+PSRLD mmxreg,mem \301\2\x0F\xD2\110 PENT,MMX,SM
+PSRLD mmxreg,mmxreg \2\x0F\xD2\110 PENT,MMX
+PSRLD mmxreg,imm \2\x0F\x72\202\25 PENT,MMX
+PSRLQ mmxreg,mem \301\2\x0F\xD3\110 PENT,MMX,SM
+PSRLQ mmxreg,mmxreg \2\x0F\xD3\110 PENT,MMX
+PSRLQ mmxreg,imm \2\x0F\x73\202\25 PENT,MMX
+PSRLW mmxreg,mem \301\2\x0F\xD1\110 PENT,MMX,SM
+PSRLW mmxreg,mmxreg \2\x0F\xD1\110 PENT,MMX
+PSRLW mmxreg,imm \2\x0F\x71\202\25 PENT,MMX
+PSUBB mmxreg,mem \301\2\x0F\xF8\110 PENT,MMX,SM
+PSUBB mmxreg,mmxreg \2\x0F\xF8\110 PENT,MMX
+PSUBD mmxreg,mem \301\2\x0F\xFA\110 PENT,MMX,SM
+PSUBD mmxreg,mmxreg \2\x0F\xFA\110 PENT,MMX
+PSUBSB mmxreg,mem \301\2\x0F\xE8\110 PENT,MMX,SM
+PSUBSB mmxreg,mmxreg \2\x0F\xE8\110 PENT,MMX
+PSUBSW mmxreg,mem \301\2\x0F\xE9\110 PENT,MMX,SM
+PSUBSW mmxreg,mmxreg \2\x0F\xE9\110 PENT,MMX
+PSUBUSB mmxreg,mem \301\2\x0F\xD8\110 PENT,MMX,SM
+PSUBUSB mmxreg,mmxreg \2\x0F\xD8\110 PENT,MMX
+PSUBUSW mmxreg,mem \301\2\x0F\xD9\110 PENT,MMX,SM
+PSUBUSW mmxreg,mmxreg \2\x0F\xD9\110 PENT,MMX
+PSUBW mmxreg,mem \301\2\x0F\xF9\110 PENT,MMX,SM
+PSUBW mmxreg,mmxreg \2\x0F\xF9\110 PENT,MMX
+PUNPCKHBW mmxreg,mem \301\2\x0F\x68\110 PENT,MMX,SM
+PUNPCKHBW mmxreg,mmxreg \2\x0F\x68\110 PENT,MMX
+PUNPCKHDQ mmxreg,mem \301\2\x0F\x6A\110 PENT,MMX,SM
+PUNPCKHDQ mmxreg,mmxreg \2\x0F\x6A\110 PENT,MMX
+PUNPCKHWD mmxreg,mem \301\2\x0F\x69\110 PENT,MMX,SM
+PUNPCKHWD mmxreg,mmxreg \2\x0F\x69\110 PENT,MMX
+PUNPCKLBW mmxreg,mem \301\2\x0F\x60\110 PENT,MMX,SM
+PUNPCKLBW mmxreg,mmxreg \2\x0F\x60\110 PENT,MMX
+PUNPCKLDQ mmxreg,mem \301\2\x0F\x62\110 PENT,MMX,SM
+PUNPCKLDQ mmxreg,mmxreg \2\x0F\x62\110 PENT,MMX
+PUNPCKLWD mmxreg,mem \301\2\x0F\x61\110 PENT,MMX,SM
+PUNPCKLWD mmxreg,mmxreg \2\x0F\x61\110 PENT,MMX
+PUSH mem16 \320\300\1\xFF\206 8086
+PUSH mem32 \321\300\1\xFF\206 386
+PUSH reg_fsgs \1\x0F\7 386
+PUSH reg_sreg \6 8086
+PUSH reg16 \320\10\x50 8086
+PUSH reg32 \321\10\x50 386
+PUSH imm8 \1\x6A\14 286
+PUSH imm16 \320\1\x68\30 286
+PUSH imm32 \321\1\x68\40 386
+PUSHA void \1\x60 186
+PUSHAD void \321\1\x60 386
+PUSHAW void \320\1\x60 186
+PUSHF void \1\x9C 186
+PUSHFD void \321\1\x9C 386
+PUSHFW void \320\1\x9C 186
+PXOR mmxreg,mem \301\2\x0F\xEF\110 PENT,MMX,SM
+PXOR mmxreg,mmxreg \2\x0F\xEF\110 PENT,MMX
+RCL rm8,unity \300\1\xD0\202 8086
+RCL rm8,reg_cl \300\1\xD2\202 8086
+RCL rm8,imm \300\1\xC0\202\25 286
+RCL rm16,unity \320\300\1\xD1\202 8086
+RCL rm16,reg_cl \320\300\1\xD3\202 8086
+RCL rm16,imm \320\300\1\xC1\202\25 286
+RCL rm32,unity \321\300\1\xD1\202 386
+RCL rm32,reg_cl \321\300\1\xD3\202 386
+RCL rm32,imm \321\300\1\xC1\202\25 386
+RCR rm8,unity \300\1\xD0\203 8086
+RCR rm8,reg_cl \300\1\xD2\203 8086
+RCR rm8,imm \300\1\xC0\203\25 286
+RCR rm16,unity \320\300\1\xD1\203 8086
+RCR rm16,reg_cl \320\300\1\xD3\203 8086
+RCR rm16,imm \320\300\1\xC1\203\25 286
+RCR rm32,unity \321\300\1\xD1\203 386
+RCR rm32,reg_cl \321\300\1\xD3\203 386
+RCR rm32,imm \321\300\1\xC1\203\25 386
+RDMSR void \2\x0F\x32 PENT
+RDPMC void \2\x0F\x33 P6
+RDTSC void \2\x0F\x31 PENT
+RESB imm \340 8086
+RESD ignore ignore ignore
+RESQ ignore ignore ignore
+REST ignore ignore ignore
+RESW ignore ignore ignore
+RET void \1\xC3 8086
+RET imm \1\xC2\30 8086
+RETF void \1\xCB 8086
+RETF imm \1\xCA\30 8086
+RETN void \1\xC3 8086
+RETN imm \1\xC2\30 8086
+ROL rm8,unity \300\1\xD0\200 8086
+ROL rm8,reg_cl \300\1\xD2\200 8086
+ROL rm8,imm \300\1\xC0\200\25 286
+ROL rm16,unity \320\300\1\xD1\200 8086
+ROL rm16,reg_cl \320\300\1\xD3\200 8086
+ROL rm16,imm \320\300\1\xC1\200\25 286
+ROL rm32,unity \321\300\1\xD1\200 386
+ROL rm32,reg_cl \321\300\1\xD3\200 386
+ROL rm32,imm \321\300\1\xC1\200\25 386
+ROR rm8,unity \300\1\xD0\201 8086
+ROR rm8,reg_cl \300\1\xD2\201 8086
+ROR rm8,imm \300\1\xC0\201\25 286
+ROR rm16,unity \320\300\1\xD1\201 8086
+ROR rm16,reg_cl \320\300\1\xD3\201 8086
+ROR rm16,imm \320\300\1\xC1\201\25 286
+ROR rm32,unity \321\300\1\xD1\201 386
+ROR rm32,reg_cl \321\300\1\xD3\201 386
+ROR rm32,imm \321\300\1\xC1\201\25 386
+RSM void \2\x0F\xAA PENT
+SAHF void \1\x9E 8086
+SAL rm8,unity \300\1\xD0\204 8086,ND
+SAL rm8,reg_cl \300\1\xD2\204 8086,ND
+SAL rm8,imm \300\1\xC0\204\25 286,ND
+SAL rm16,unity \320\300\1\xD1\204 8086,ND
+SAL rm16,reg_cl \320\300\1\xD3\204 8086,ND
+SAL rm16,imm \320\300\1\xC1\204\25 286,ND
+SAL rm32,unity \321\300\1\xD1\204 386,ND
+SAL rm32,reg_cl \321\300\1\xD3\204 386,ND
+SAL rm32,imm \321\300\1\xC1\204\25 386,ND
+SALC void \1\xD6 8086,UNDOC
+SAR rm8,unity \300\1\xD0\207 8086
+SAR rm8,reg_cl \300\1\xD2\207 8086
+SAR rm8,imm \300\1\xC0\207\25 286
+SAR rm16,unity \320\300\1\xD1\207 8086
+SAR rm16,reg_cl \320\300\1\xD3\207 8086
+SAR rm16,imm \320\300\1\xC1\207\25 286
+SAR rm32,unity \321\300\1\xD1\207 386
+SAR rm32,reg_cl \321\300\1\xD3\207 386
+SAR rm32,imm \321\300\1\xC1\207\25 386
+SBB mem,reg8 \300\1\x18\101 8086,SM
+SBB reg8,reg8 \300\1\x18\101 8086
+SBB mem,reg16 \320\300\1\x19\101 8086,SM
+SBB reg16,reg16 \320\300\1\x19\101 8086
+SBB mem,reg32 \321\300\1\x19\101 386,SM
+SBB reg32,reg32 \321\300\1\x19\101 386
+SBB reg8,mem \301\1\x1A\110 8086,SM
+SBB reg8,reg8 \301\1\x1A\110 8086
+SBB reg16,mem \320\301\1\x1B\110 8086,SM
+SBB reg16,reg16 \320\301\1\x1B\110 8086
+SBB reg32,mem \321\301\1\x1B\110 386,SM
+SBB reg32,reg32 \321\301\1\x1B\110 386
+SBB rm16,imm8 \320\300\1\x83\203\15 8086
+SBB rm32,imm8 \321\300\1\x83\203\15 8086
+SBB reg_al,imm \1\x1C\21 8086,SM
+SBB reg_ax,imm \320\1\x1D\31 8086,SM
+SBB reg_eax,imm \321\1\x1D\41 386,SM
+SBB rm8,imm \300\1\x80\203\21 8086,SM
+SBB rm16,imm \320\300\1\x81\203\31 8086,SM
+SBB rm32,imm \321\300\1\x81\203\41 386,SM
+SBB mem,imm8 \300\1\x80\203\21 8086,SM
+SBB mem,imm16 \320\300\1\x81\203\31 8086,SM
+SBB mem,imm32 \321\300\1\x81\203\41 386,SM
+SCASB void \1\xAE 8086
+SCASD void \321\1\xAF 386
+SCASW void \320\1\xAF 8086
+SGDT mem \300\2\x0F\x01\200 286,PRIV
+SHL rm8,unity \300\1\xD0\204 8086
+SHL rm8,reg_cl \300\1\xD2\204 8086
+SHL rm8,imm \300\1\xC0\204\25 286
+SHL rm16,unity \320\300\1\xD1\204 8086
+SHL rm16,reg_cl \320\300\1\xD3\204 8086
+SHL rm16,imm \320\300\1\xC1\204\25 286
+SHL rm32,unity \321\300\1\xD1\204 386
+SHL rm32,reg_cl \321\300\1\xD3\204 386
+SHL rm32,imm \321\300\1\xC1\204\25 386
+SHLD mem,reg16,imm \300\320\2\x0F\xA4\101\26 386,SM2
+SHLD reg16,reg16,imm \300\320\2\x0F\xA4\101\26 386,SM2
+SHLD mem,reg32,imm \300\321\2\x0F\xA4\101\26 386,SM2
+SHLD reg32,reg32,imm \300\321\2\x0F\xA4\101\26 386,SM2
+SHLD mem,reg16,reg_cl \300\320\2\x0F\xA5\101 386,SM
+SHLD reg16,reg16,reg_cl \300\320\2\x0F\xA5\101 386
+SHLD mem,reg32,reg_cl \300\321\2\x0F\xA5\101 386,SM
+SHLD reg32,reg32,reg_cl \300\321\2\x0F\xA5\101 386
+SHR rm8,unity \300\1\xD0\205 8086
+SHR rm8,reg_cl \300\1\xD2\205 8086
+SHR rm8,imm \300\1\xC0\205\25 286
+SHR rm16,unity \320\300\1\xD1\205 8086
+SHR rm16,reg_cl \320\300\1\xD3\205 8086
+SHR rm16,imm \320\300\1\xC1\205\25 286
+SHR rm32,unity \321\300\1\xD1\205 386
+SHR rm32,reg_cl \321\300\1\xD3\205 386
+SHR rm32,imm \321\300\1\xC1\205\25 386
+SHRD mem,reg16,imm \300\320\2\x0F\xAC\101\26 386,SM2
+SHRD reg16,reg16,imm \300\320\2\x0F\xAC\101\26 386,SM2
+SHRD mem,reg32,imm \300\321\2\x0F\xAC\101\26 386,SM2
+SHRD reg32,reg32,imm \300\321\2\x0F\xAC\101\26 386,SM2
+SHRD mem,reg16,reg_cl \300\320\2\x0F\xAD\101 386,SM
+SHRD reg16,reg16,reg_cl \300\320\2\x0F\xAD\101 386
+SHRD mem,reg32,reg_cl \300\321\2\x0F\xAD\101 386,SM
+SHRD reg32,reg32,reg_cl \300\321\2\x0F\xAD\101 386
+SIDT mem \300\2\x0F\x01\201 286,PRIV
+SLDT mem \300\1\x0F\17\200 286,PRIV
+SLDT mem16 \300\1\x0F\17\200 286,PRIV
+SLDT reg16 \300\1\x0F\17\200 286,PRIV
+SMSW mem \300\2\x0F\x01\204 286,PRIV
+SMSW reg16 \300\2\x0F\x01\204 286,PRIV
+STC void \1\xF9 8086
+STD void \1\xFD 8086
+STI void \1\xFB 8086
+STOSB void \1\xAA 8086
+STOSD void \321\1\xAB 386
+STOSW void \320\1\xAB 8086
+STR mem \300\1\x0F\17\201 286,PRIV
+STR mem16 \300\1\x0F\17\201 286,PRIV
+STR reg16 \300\1\x0F\17\201 286,PRIV
+SUB mem,reg8 \300\1\x28\101 8086,SM
+SUB reg8,reg8 \300\1\x28\101 8086
+SUB mem,reg16 \320\300\1\x29\101 8086,SM
+SUB reg16,reg16 \320\300\1\x29\101 8086
+SUB mem,reg32 \321\300\1\x29\101 386,SM
+SUB reg32,reg32 \321\300\1\x29\101 386
+SUB reg8,mem \301\1\x2A\110 8086,SM
+SUB reg8,reg8 \301\1\x2A\110 8086
+SUB reg16,mem \320\301\1\x2B\110 8086,SM
+SUB reg16,reg16 \320\301\1\x2B\110 8086
+SUB reg32,mem \321\301\1\x2B\110 386,SM
+SUB reg32,reg32 \321\301\1\x2B\110 386
+SUB rm16,imm8 \320\300\1\x83\205\15 8086
+SUB rm32,imm8 \321\300\1\x83\205\15 386
+SUB reg_al,imm \1\x2C\21 8086,SM
+SUB reg_ax,imm \320\1\x2D\31 8086,SM
+SUB reg_eax,imm \321\1\x2D\41 386,SM
+SUB rm8,imm \300\1\x80\205\21 8086,SM
+SUB rm16,imm \320\300\1\x81\205\31 8086,SM
+SUB rm32,imm \321\300\1\x81\205\41 386,SM
+SUB mem,imm8 \300\1\x80\205\21 8086,SM
+SUB mem,imm16 \320\300\1\x81\205\31 8086,SM
+SUB mem,imm32 \321\300\1\x81\205\41 386,SM
+TEST mem,reg8 \300\1\x84\101 8086,SM
+TEST reg8,reg8 \300\1\x84\101 8086
+TEST mem,reg16 \320\300\1\x85\101 8086,SM
+TEST reg16,reg16 \320\300\1\x85\101 8086
+TEST mem,reg32 \321\300\1\x85\101 386,SM
+TEST reg32,reg32 \321\300\1\x85\101 386
+TEST reg_al,imm \1\xA8\21 8086,SM
+TEST reg_ax,imm \320\1\xA9\31 8086,SM
+TEST reg_eax,imm \321\1\xA9\41 386,SM
+TEST rm8,imm \300\1\xF6\200\21 8086,SM
+TEST rm16,imm \320\300\1\xF7\200\31 8086,SM
+TEST rm32,imm \321\300\1\xF7\200\41 386,SM
+TEST mem,imm8 \300\1\xF6\200\21 8086,SM
+TEST mem,imm16 \320\300\1\xF7\200\31 8086,SM
+TEST mem,imm32 \321\300\1\xF7\200\41 386,UNDOC,SM
+UMOV mem,reg8 \300\2\x0F\x10\101 386,UNDOC,SM
+UMOV reg8,reg8 \300\2\x0F\x10\101 386,UNDOC
+UMOV mem,reg16 \320\300\2\x0F\x11\101 386,UNDOC,SM
+UMOV reg16,reg16 \320\300\2\x0F\x11\101 386,UNDOC
+UMOV mem,reg32 \321\300\2\x0F\x11\101 386,UNDOC,SM
+UMOV reg32,reg32 \321\300\2\x0F\x11\101 386,UNDOC
+UMOV reg8,mem \301\2\x0F\x12\110 386,UNDOC,SM
+UMOV reg8,reg8 \301\2\x0F\x12\110 386,UNDOC
+UMOV reg16,mem \320\301\2\x0F\x13\110 386,UNDOC,SM
+UMOV reg16,reg16 \320\301\2\x0F\x13\110 386,UNDOC
+UMOV reg32,mem \321\301\2\x0F\x13\110 386,UNDOC,SM
+UMOV reg32,reg32 \321\301\2\x0F\x13\110 386,UNDOC
+VERR mem \300\1\x0F\17\204 286,PRIV
+VERR mem16 \300\1\x0F\17\204 286,PRIV
+VERR reg16 \300\1\x0F\17\204 286,PRIV
+VERW mem \300\1\x0F\17\205 286,PRIV
+VERW mem16 \300\1\x0F\17\205 286,PRIV
+VERW reg16 \300\1\x0F\17\205 286,PRIV
+WAIT void \1\x9B 8086
+WBINVD void \2\x0F\x09 486
+WRMSR void \2\x0F\x30 PENT
+XADD mem,reg8 \300\2\x0F\xC0\101 486,SM
+XADD reg8,reg8 \300\2\x0F\xC0\101 486
+XADD mem,reg16 \320\300\2\x0F\xC1\101 486,SM
+XADD reg16,reg16 \320\300\2\x0F\xC1\101 486
+XADD mem,reg32 \321\300\2\x0F\xC1\101 486,SM
+XADD reg32,reg32 \321\300\2\x0F\xC1\101 486
+XCHG reg_ax,reg16 \320\11\x90 8086
+XCHG reg_eax,reg32 \321\11\x90 386
+XCHG reg16,reg_ax \320\10\x90 8086
+XCHG reg32,reg_eax \321\10\x90 386
+XCHG reg8,mem \301\1\x86\110 8086,SM
+XCHG reg8,reg8 \301\1\x86\110 8086
+XCHG reg16,mem \320\301\1\x87\110 8086,SM
+XCHG reg16,reg16 \320\301\1\x87\110 8086
+XCHG reg32,mem \321\301\1\x87\110 386,SM
+XCHG reg32,reg32 \321\301\1\x87\110 386
+XCHG mem,reg8 \300\1\x86\101 8086,SM
+XCHG reg8,reg8 \300\1\x86\101 8086
+XCHG mem,reg16 \320\300\1\x87\101 8086,SM
+XCHG reg16,reg16 \320\300\1\x87\101 8086
+XCHG mem,reg32 \321\300\1\x87\101 386,SM
+XCHG reg32,reg32 \321\300\1\x87\101 386
+XLATB void \1\xD7 8086
+XOR mem,reg8 \300\1\x30\101 8086,SM
+XOR reg8,reg8 \300\1\x30\101 8086
+XOR mem,reg16 \320\300\1\x31\101 8086,SM
+XOR reg16,reg16 \320\300\1\x31\101 8086
+XOR mem,reg32 \321\300\1\x31\101 386,SM
+XOR reg32,reg32 \321\300\1\x31\101 386
+XOR reg8,mem \301\1\x32\110 8086,SM
+XOR reg8,reg8 \301\1\x32\110 8086
+XOR reg16,mem \320\301\1\x33\110 8086,SM
+XOR reg16,reg16 \320\301\1\x33\110 8086
+XOR reg32,mem \321\301\1\x33\110 386,SM
+XOR reg32,reg32 \321\301\1\x33\110 386
+XOR rm16,imm8 \320\300\1\x83\206\15 8086
+XOR rm32,imm8 \321\300\1\x83\206\15 386
+XOR reg_al,imm \1\x34\21 8086,SM
+XOR reg_ax,imm \320\1\x35\31 8086,SM
+XOR reg_eax,imm \321\1\x35\41 386,SM
+XOR rm8,imm \300\1\x80\206\21 8086,SM
+XOR rm16,imm \320\300\1\x81\206\31 8086,SM
+XOR rm32,imm \321\300\1\x81\206\41 386,SM
+XOR mem,imm8 \300\1\x80\206\21 8086,SM
+XOR mem,imm16 \320\300\1\x81\206\31 8086,SM
+XOR mem,imm32 \321\300\1\x81\206\41 386,SM
+CMOVcc reg16,mem \320\301\1\x0F\330\x40\110 P6,SM
+CMOVcc reg16,reg16 \320\301\1\x0F\330\x40\110 P6
+CMOVcc reg32,mem \320\301\1\x0F\330\x40\110 P6,SM
+CMOVcc reg32,reg32 \320\301\1\x0F\330\x40\110 P6
+Jcc imm|near \322\1\x0F\330\x80\64 386
+Jcc imm \330\x70\50 8086
+Jcc imm|short \330\x70\50 8086
+SETcc mem \300\1\x0F\330\x90\200 386,SB
+SETcc reg8 \300\1\x0F\330\x90\200 386
diff --git a/insns.h b/insns.h
new file mode 100644
index 0000000..c42790d
--- /dev/null
+++ b/insns.h
@@ -0,0 +1,66 @@
+/* insns.h header file for insns.c
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#ifndef NASM_INSNS_H
+#define NASM_INSNS_H
+
+struct itemplate {
+ int opcode; /* the token, passed from "parser.c" */
+ int operands; /* number of operands */
+ long opd[3]; /* bit flags for operand types */
+ char *code; /* the code it assembles to */
+ int flags; /* some flags */
+};
+
+/*
+ * Instruction template flags. These specify which processor
+ * targets the instruction is eligible for, whether it is
+ * privileged or undocumented, and also specify extra error
+ * checking on the matching of the instruction.
+ *
+ * IF_SM stands for Size Match: any operand whose size is not
+ * explicitly specified by the template is `really' intended to be
+ * the same size as the first size-specified operand.
+ * Non-specification is tolerated in the input instruction, but
+ * _wrong_ specification is not.
+ *
+ * IF_SM2 invokes Size Match on only the first _two_ operands, for
+ * three-operand instructions such as SHLD: it implies that the
+ * first two operands must match in size, but that the third is
+ * required to be _unspecified_.
+ *
+ * IF_SB invokes Size Byte: operands with unspecified size in the
+ * template are really bytes, and so no non-byte specification in
+ * the input instruction will be tolerated.
+ *
+ * IF_SD similarly invokes Size Doubleword.
+ *
+ * (The default state if neither IF_SM nor IF_SM2 is specified is
+ * that any operand with unspecified size in the template is
+ * required to have unspecified size in the instruction too...)
+ */
+
+#define IF_SM 0x0001 /* size match */
+#define IF_SM2 0x0002 /* size match first two operands */
+#define IF_SB 0x0004 /* unsized operands can't be non-byte */
+#define IF_SD 0x0008 /* unsized operands can't be nondword */
+#define IF_8086 0x0000 /* 8086 instruction */
+#define IF_186 0x0010 /* 186+ instruction */
+#define IF_286 0x0020 /* 286+ instruction */
+#define IF_386 0x0030 /* 386+ instruction */
+#define IF_486 0x0040 /* 486+ instruction */
+#define IF_PENT 0x0050 /* Pentium instruction */
+#define IF_P6 0x0060 /* P6 instruction */
+#define IF_PMASK 0x00F0 /* the mask for processor types */
+#define IF_PRIV 0x0100 /* it's a privileged instruction */
+#define IF_UNDOC 0x0200 /* it's an undocumented instruction */
+#define IF_FPU 0x0400 /* it's an FPU instruction */
+#define IF_MMX 0x0800 /* it's an MMX instruction */
+#define IF_ND 0x1000 /* ignore this in the disassembler */
+
+#endif
diff --git a/insns.pl b/insns.pl
new file mode 100644
index 0000000..275a66b
--- /dev/null
+++ b/insns.pl
@@ -0,0 +1,160 @@
+#!/usr/bin/perl
+#
+# insns.pl produce insnsa.c and insnsd.c from insns.dat
+#
+# The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+# Julian Hall. All rights reserved. The software is
+# redistributable under the licence given in the file "Licence"
+# distributed in the NASM archive.
+
+print STDERR "Reading insns.dat...\n";
+
+open (F, "insns.dat") || die "unable to open insns.dat";
+
+$line = 0;
+$opcodes = 0;
+$insns = 0;
+while (<F>) {
+ $line++;
+ next if /^\s*;/; # comments
+ chomp;
+ split;
+ next if $#_ == -1; # blank lines
+ (warn "line $line does not contain four fields\n"), next if $#_ != 3;
+ $formatted = &format(@_);
+ if ($formatted) {
+ $insns++;
+ $aname = "aa_$_[0]";
+ push @$aname, $formatted;
+ }
+ $opcodes[$opcodes++] = $_[0], $done{$_[0]} = 1 if !$done{$_[0]};
+ if ($formatted && $formatted !~ /IF_ND/) {
+ push @big, $formatted;
+ foreach $i (&startbyte($_[2])) {
+ $aname = sprintf "dd_%02X",$i;
+ push @$aname, $#big;
+ }
+ }
+}
+
+close F;
+
+print STDERR "Writing insnsa.c...\n";
+
+open A, ">insnsa.c";
+
+print A "/* This file auto-generated from insns.dat by insns.pl" .
+ " - don't edit it */\n\n";
+print A "#include <stdio.h>\n";
+print A "#include \"nasm.h\"\n";
+print A "#include \"insns.h\"\n";
+print A "\n";
+
+foreach $i (@opcodes) {
+ print A "static struct itemplate instrux_${i}[] = {\n";
+ $aname = "aa_$i";
+ foreach $j (@$aname) {
+ print A " $j\n";
+ }
+ print A " {-1}\n};\n\n";
+}
+print A "struct itemplate *nasm_instructions[] = {\n";
+foreach $i (@opcodes) {
+ print A " instrux_${i},\n";
+}
+print A "};\n";
+
+close A;
+
+print STDERR "Writing insnsd.c...\n";
+
+open D, ">insnsd.c";
+
+print D "/* This file auto-generated from insns.dat by insns.pl" .
+ " - don't edit it */\n\n";
+print D "#include <stdio.h>\n";
+print D "#include \"nasm.h\"\n";
+print D "#include \"insns.h\"\n";
+print D "\n";
+
+print D "static struct itemplate instrux[] = {\n";
+foreach $j (@big) {
+ print D " $j\n";
+}
+print D " {-1}\n};\n\n";
+
+for ($c=0; $c<256; $c++) {
+ $h = sprintf "%02X", $c;
+ print D "static struct itemplate *itable_${h}[] = {\n";
+ $aname = "dd_$h";
+ foreach $j (@$aname) {
+ print D " instrux + $j,\n";
+ }
+ print D " NULL\n};\n\n";
+}
+
+print D "struct itemplate **itable[] = {\n";
+for ($c=0; $c<256; $c++) {
+ printf D " itable_%02X,\n", $c;
+}
+print D "};\n";
+
+close D;
+
+printf STDERR "Done: %d instructions\n", $insns;
+
+sub format {
+ local ($opcode, $operands, $codes, $flags) = @_;
+ local $num;
+
+ return undef if $operands eq "ignore";
+
+ # format the operands
+ $operands =~ s/:/|colon,/g;
+ $operands =~ s/mem(\d+)/mem|bits$1/g;
+ $operands =~ s/mem/memory/g;
+ $operands =~ s/memory_offs/mem_offs/g;
+ $operands =~ s/imm(\d+)/imm|bits$1/g;
+ $operands =~ s/imm/immediate/g;
+ $operands =~ s/rm(\d+)/regmem|bits$1/g;
+ $num = 3;
+ $operands = '0,0,0', $num = 0 if $operands eq 'void';
+ $operands .= ',0', $num-- while $operands !~ /,.*,/;
+ $operands =~ tr/a-z/A-Z/;
+
+ # format the flags
+ $flags =~ s/,/|IF_/g;
+ $flags = "IF_" . $flags;
+
+ "{I_$opcode, $num, {$operands}, \"$codes\", $flags},";
+}
+
+# Here we determine the range of possible starting bytes for a given
+# instruction. We need only consider the codes:
+# \1 \2 \3 mean literal bytes, of course
+# \4 \5 \6 \7 mean PUSH/POP of segment registers: special case
+# \10 \11 \12 mean byte plus register value
+# \17 means byte zero
+# \330 means byte plus condition code
+# \0 or \340 mean give up and return empty set
+sub startbyte { # FIXME we cheat, for now :-)
+ local ($codes) = @_;
+ local $word, @range;
+
+ while (1) {
+ die "couldn't get code in '$codes'" if $codes !~ /^(\\[^\\]+)(\\.*)?$/;
+ $word = $1, $codes = $2;
+ return (hex $1) if $word =~ /^\\[123]$/ && $codes =~ /^\\x(..)/;
+ return (0x07, 0x17, 0x1F) if $word eq "\\4";
+ return (0xA1, 0xA9) if $word eq "\\5";
+ return (0x06, 0x0E, 0x16, 0x1E) if $word eq "\\6";
+ return (0xA0, 0xA8) if $word eq "\\7";
+ $start=hex $1, $r=8, last if $word =~ /^\\1[012]$/ && $codes =~/^\\x(..)/;
+ return (0) if $word eq "\\17";
+ $start=hex $1, $r=16, last if $word =~ /^\\330$/ && $codes =~ /^\\x(..)/;
+ return () if $word eq "\\0" || $word eq "\\340";
+ }
+ @range = ();
+ push @range, $start++ while ($r-- > 0);
+ @range;
+}
diff --git a/internal.doc b/internal.doc
new file mode 100644
index 0000000..f04152a
--- /dev/null
+++ b/internal.doc
@@ -0,0 +1,268 @@
+Internals of the Netwide Assembler
+==================================
+
+The Netwide Assembler is intended to be a modular, re-usable x86
+assembler, which can be embedded in other programs, for example as
+the back end to a compiler.
+
+The assembler is composed of modules. The interfaces between them
+look like:
+
+ +---- parser.c ----+
+ | | |
+ | float.c |
+ | |
+ +--- assemble.c ---+
+ | | |
+ nasm.c ---+ insnsa.c +--- nasmlib.c
+ | |
+ +---- labels.c ----+
+ | |
+ +--- outform.c ----+
+ | |
+ +----- *out.c -----+
+
+In other words, each of `parser.c', `assemble.c', `labels.c',
+`outform.c' and each of the output format modules `*out.c' are
+independent modules, which do not inter-communicate except through
+the main program.
+
+The Netwide *Disassembler* is not intended to be particularly
+portable or reusable or anything, however. So I won't bother
+documenting it here. :-)
+
+nasmlib.c
+---------
+
+This is a library module; it contains simple library routines which
+may be referenced by all other modules. Among these are a set of
+wrappers around the standard `malloc' routines, which will report a
+fatal error if they run out of memory, rather than returning NULL.
+
+parser.c
+--------
+
+This contains a source-line parser. It parses `canonical' assembly
+source lines, containing some combination of the `label', `opcode',
+`operand' and `comment' fields: it does not process directives or
+macros. It exports two functions: `parse_line' and `cleanup_insn'.
+
+`parse_line' is the main parser function: you pass it a source line
+in ASCII text form, and it returns you an `insn' structure
+containing all the details of the instruction on that line. The
+parameters it requires are:
+
+- The location (segment, offset) where the instruction on this line
+ will eventually be placed. This is necessary in order to evaluate
+ expressions containing the Here token, `$'.
+
+- A function which can be called to retrieve the value of any
+ symbols the source line references.
+
+- Which pass the assembler is on: an undefined symbol only causes an
+ error condition on pass two.
+
+- The source line to be parsed.
+
+- A structure to fill with the results of the parse.
+
+- A function which can be called to report errors.
+
+Some instructions (DB, DW, DD for example) can require an arbitrary
+amount of storage, and so some of the members of the resulting
+`insn' structure will be dynamically allocated. The other function
+exported by `parser.c' is `cleanup_insn', which can be called to
+deallocate any dynamic storage associated with the results of a
+parse.
+
+names.c
+-------
+
+This doesn't count as a module - it defines a few arrays which are
+shared between NASM and NDISASM, so it's a separate file which is
+#included by both parser.c and disasm.c.
+
+float.c
+-------
+
+This is essentially a library module: it exports one function,
+`float_const', which converts an ASCII representation of a
+floating-point number into an x86-compatible binary representation,
+without using any built-in floating-point arithmetic (so it will run
+on any platform, portably). It calls nothing, and is called only by
+`parser.c'. Note that the function `float_const' must be passed an
+error reporting routine.
+
+assemble.c
+----------
+
+This module contains the code generator: it translates `insn'
+structures as returned from the parser module into actual generated
+code which can be placed in an output file. It exports two
+functions, `assemble' and `insn_size'.
+
+`insn_size' is designed to be called on pass one of assembly: it
+takes an `insn' structure as input, and returns the amount of space
+that would be taken up if the instruction described in the structure
+were to be converted to real machine code. `insn_size' also requires
+to be told the location (as a segment/offset pair) where the
+instruction would be assembled, the mode of assembly (16/32 bit
+default), and a function it can call to report errors.
+
+`assemble' is designed to be called on pass two: it takes all the
+parameters that `insn_size' does, but has an extra parameter which
+is an output driver. `assemble' actually converts the input
+instruction into machine code, and outputs the machine code by means
+of calling the `output' function of the driver.
+
+insnsa.c
+--------
+
+This is another library module: it exports one very big array of
+instruction translations. It has to be a separate module so that DOS
+compilers, with less memory to spare than typical Unix ones, can
+cope with it.
+
+labels.c
+--------
+
+This module contains a label manager. It exports six functions:
+
+`init_labels' should be called before any other function in the
+module. `cleanup_labels' may be called after all other use of the
+module has finished, to deallocate storage.
+
+`define_label' is called to define new labels: you pass it the name
+of the label to be defined, and the (segment,offset) pair giving the
+value of the label. It is also passed an error-reporting function,
+and an output driver structure (so that it can call the output
+driver's label-definition function). `define_label' mentally
+prepends the name of the most recently defined non-local label to
+any label beginning with a period.
+
+`define_label_stub' is designed to be called in pass two, once all
+the labels have already been defined: it does nothing except to
+update the "most-recently-defined-non-local-label" status, so that
+references to local labels in pass two will work correctly.
+
+`declare_as_global' is used to declare that a label should be
+global. It must be called _before_ the label in question is defined.
+
+Finally, `lookup_label' attempts to translate a label name into a
+(segment,offset) pair. It returns non-zero on success.
+
+The label manager module is (theoretically :) restartable: after
+calling `cleanup_labels', you can call `init_labels' again, and
+start a new assembly with a new set of symbols.
+
+outform.c
+---------
+
+This small module contains a set of routines to manage a list of
+output formats, and select one given a keyword. It contains three
+small routines: `ofmt_register' which registers an output driver as
+part of the managed list, `ofmt_list' which lists the available
+drivers on stdout, and `ofmt_find' which tries to find the driver
+corresponding to a given name.
+
+The output modules
+------------------
+
+Each of the output modules, `binout.o', `elfout.o' and so on,
+exports only one symbol, which is an output driver data structure
+containing pointers to all the functions needed to produce output
+files of the appropriate type.
+
+The exception to this is `coffout.o', which exports _two_ output
+driver structures, since COFF and Win32 object file formats are very
+similar and most of the code is shared between them.
+
+nasm.c
+------
+
+This is the main program: it calls all the functions in the above
+modules, and puts them together to form a working assembler. We
+hope. :-)
+
+Segment Mechanism
+-----------------
+
+In NASM, the term `segment' is used to separate the different
+sections/segments/groups of which an object file is composed.
+Essentially, every address NASM is capable of understanding is
+expressed as an offset from the beginning of some segment.
+
+The defining property of a segment is that if two symbols are
+declared in the same segment, then the distance between them is
+fixed at assembly time. Hence every externally-declared variable
+must be declared in its own segment, since none of the locations of
+these are known, and so no distances may be computed at assembly
+time.
+
+The special segment value NO_SEG (-1) is used to denote an absolute
+value, e.g. a constant whose value does not depend on relocation,
+such as the _size_ of a data object.
+
+Apart from NO_SEG, segment indices all have their least significant
+bit clear, if they refer to actual in-memory segments. For each
+segment of this type, there is an auxiliary segment value, defined
+to be the same number but with the LSB set, which denotes the
+segment-base value of that segment, for object formats which support
+it (Microsoft .OBJ, for example).
+
+Hence, if `textsym' is declared in a code segment with index 2, then
+referencing `SEG textsym' would return zero offset from
+segment-index 3. Or, in object formats which don't understand such
+references, it would return an error instead.
+
+The next twist is SEG_ABS. Some symbols may be declared with a
+segment value of SEG_ABS plus a 16-bit constant: this indicates that
+they are far-absolute symbols, such as the BIOS keyboard buffer
+under MS-DOS, which always resides at 0040h:001Eh. Far-absolutes are
+handled with care in the parser, since they are supposed to evaluate
+simply to their offset part within expressions, but applying SEG to
+one should yield its segment part. A far-absolute should never find
+its way _out_ of the parser, unless it is enclosed in a WRT clause,
+in which case Microsoft 16-bit object formats will want to know
+about it.
+
+Porting Issues
+--------------
+
+We have tried to write NASM in portable ANSI C: we do not assume
+little-endianness or any hardware characteristics (in order that
+NASM should work as a cross-assembler for x86 platforms, even when
+run on other, stranger machines).
+
+Assumptions we _have_ made are:
+
+- We assume that `short' is at least 16 bits, and `long' at least
+ 32. This really _shouldn't_ be a problem, since Kernighan and
+ Ritchie tell us we are entitled to do so.
+
+- We rely on having more than 6 characters of significance on
+ externally linked symbols in the NASM sources. This may get fixed
+ at some point. We haven't yet come across a linker brain-dead
+ enough to get it wrong anyway.
+
+- We assume that `fopen' using the mode "wb" can be used to write
+ binary data files. This may be wrong on systems like VMS, with a
+ strange file system. Though why you'd want to run NASM on VMS is
+ beyond me anyway.
+
+That's it. Subject to those caveats, NASM should be completely
+portable. If not, we _really_ want to know about it.
+
+Porting Non-Issues
+------------------
+
+The following is _not_ a portability problem, although it looks like
+one.
+
+- When compiling with some versions of DJGPP, you may get errors
+ such as `warning: ANSI C forbids braced-groups within
+ expressions'. This isn't NASM's fault - the problem seems to be
+ that DJGPP's definitions of the <ctype.h> macros include a
+ GNU-specific C extension. So when compiling using -ansi and
+ -pedantic, DJGPP complains about its own header files. It isn't a
+ problem anyway, since it still generates correct code.
diff --git a/labels.c b/labels.c
new file mode 100644
index 0000000..ff1d571
--- /dev/null
+++ b/labels.c
@@ -0,0 +1,292 @@
+/* labels.c label handling for the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "nasm.h"
+#include "nasmlib.h"
+
+/*
+ * A local label is one that begins with exactly one period. Things
+ * that begin with _two_ periods are NASM-specific things.
+ */
+#define islocal(l) ((l)[0] == '.' && (l)[1] != '.')
+
+#define LABEL_BLOCK 320 /* no. of labels/block */
+#define LBLK_SIZE (LABEL_BLOCK*sizeof(union label))
+#define LABEL_HASHES 32 /* no. of hash table entries */
+
+#define END_LIST -3 /* don't clash with NO_SEG! */
+#define END_BLOCK -2
+#define BOGUS_VALUE -4
+
+#define PERMTS_SIZE 4096 /* size of text blocks */
+
+/* values for label.defn.is_global */
+#define NOT_DEFINED_YET 0
+#define LOCAL_SYMBOL 1
+#define GLOBAL_SYMBOL 2
+#define GLOBAL_PLACEHOLDER 3
+
+union label { /* actual label structures */
+ struct {
+ long segment, offset;
+ char *label;
+ int is_global;
+ } defn;
+ struct {
+ long movingon, dummy;
+ union label *next;
+ } admin;
+};
+
+struct permts { /* permanent text storage */
+ struct permts *next; /* for the linked list */
+ int size, usage; /* size and used space in ... */
+ char data[PERMTS_SIZE]; /* ... the data block itself */
+};
+
+static union label *ltab[LABEL_HASHES];/* using a hash table */
+static union label *lfree[LABEL_HASHES];/* pointer into the above */
+static struct permts *perm_head; /* start of perm. text storage */
+static struct permts *perm_tail; /* end of perm. text storage */
+
+static void init_block (union label *blk);
+static char *perm_copy (char *string1, char *string2);
+
+static char *prevlabel;
+
+/*
+ * Internal routine: finds the `union label' corresponding to the
+ * given label name. Creates a new one, if it isn't found, and if
+ * `create' is TRUE.
+ */
+static union label *find_label (char *label, int create) {
+ int hash = 0;
+ char *p, *prev;
+ int prevlen;
+ union label *lptr;
+
+ if (islocal(label))
+ prev = prevlabel;
+ else
+ prev = "";
+ prevlen = strlen(prev);
+ p = prev;
+ while (*p) hash += *p++;
+ p = label;
+ while (*p) hash += *p++;
+ hash %= LABEL_HASHES;
+ lptr = ltab[hash];
+ while (lptr->admin.movingon != END_LIST) {
+ if (lptr->admin.movingon == END_BLOCK) {
+ lptr = lptr->admin.next;
+ }
+ if (!strncmp(lptr->defn.label, prev, prevlen) &&
+ !strcmp(lptr->defn.label+prevlen, label))
+ return lptr;
+ lptr++;
+ }
+ if (create) {
+ if (lfree[hash]->admin.movingon == END_BLOCK) {
+ /*
+ * must allocate a new block
+ */
+ lfree[hash]->admin.next = (union label *) nasm_malloc (LBLK_SIZE);
+ lfree[hash] = lfree[hash]->admin.next;
+ init_block(lfree[hash]);
+ }
+
+ lfree[hash]->admin.movingon = BOGUS_VALUE;
+ lfree[hash]->defn.label = perm_copy (prev, label);
+ lfree[hash]->defn.is_global = NOT_DEFINED_YET;
+ return lfree[hash]++;
+ } else
+ return NULL;
+}
+
+int lookup_label (char *label, long *segment, long *offset) {
+ union label *lptr;
+
+ lptr = find_label (label, 0);
+ if (lptr && (lptr->defn.is_global == LOCAL_SYMBOL ||
+ lptr->defn.is_global == GLOBAL_SYMBOL)) {
+ *segment = lptr->defn.segment;
+ *offset = lptr->defn.offset;
+ return 1;
+ } else
+ return 0;
+}
+
+void define_label_stub (char *label, efunc error) {
+ union label *lptr;
+
+ if (!islocal(label)) {
+ lptr = find_label (label, 1);
+ if (!lptr)
+ error (ERR_PANIC, "can't find label `%s' on pass two", label);
+ prevlabel = lptr->defn.label;
+ }
+}
+
+void define_label (char *label, long segment, long offset,
+ struct ofmt *ofmt, efunc error) {
+ union label *lptr;
+
+ lptr = find_label (label, 1);
+ switch (lptr->defn.is_global) {
+ case NOT_DEFINED_YET:
+ lptr->defn.is_global = LOCAL_SYMBOL;
+ break;
+ case GLOBAL_PLACEHOLDER:
+ lptr->defn.is_global = GLOBAL_SYMBOL;
+ break;
+ default:
+ error(ERR_NONFATAL, "symbol `%s' redefined", label);
+ return;
+ }
+
+ if (label[0] != '.') /* not local, but not special either */
+ prevlabel = lptr->defn.label;
+ else if (!*prevlabel)
+ error(ERR_NONFATAL, "attempt to define a local label before any"
+ " non-local labels");
+
+ lptr->defn.segment = segment;
+ lptr->defn.offset = offset;
+
+ ofmt->symdef (lptr->defn.label, segment, offset,
+ lptr->defn.is_global == GLOBAL_SYMBOL);
+}
+
+void define_common (char *label, long segment, long size,
+ struct ofmt *ofmt, efunc error) {
+ union label *lptr;
+
+ lptr = find_label (label, 1);
+ switch (lptr->defn.is_global) {
+ case NOT_DEFINED_YET:
+ lptr->defn.is_global = LOCAL_SYMBOL;
+ break;
+ case GLOBAL_PLACEHOLDER:
+ lptr->defn.is_global = GLOBAL_SYMBOL;
+ break;
+ default:
+ error(ERR_NONFATAL, "symbol `%s' redefined", label);
+ return;
+ }
+
+ if (label[0] != '.') /* not local, but not special either */
+ prevlabel = lptr->defn.label;
+ else
+ error(ERR_NONFATAL, "attempt to define a local label as a "
+ "common variable");
+
+ lptr->defn.segment = segment;
+ lptr->defn.offset = 0;
+
+ ofmt->symdef (lptr->defn.label, segment, size, 2);
+}
+
+void declare_as_global (char *label, efunc error) {
+ union label *lptr;
+
+ if (islocal(label)) {
+ error(ERR_NONFATAL, "attempt to declare local symbol `%s' as"
+ " global", label);
+ return;
+ }
+ lptr = find_label (label, 1);
+ switch (lptr->defn.is_global) {
+ case NOT_DEFINED_YET:
+ lptr->defn.is_global = GLOBAL_PLACEHOLDER;
+ break;
+ case GLOBAL_PLACEHOLDER: /* already done: silently ignore */
+ case GLOBAL_SYMBOL:
+ break;
+ case LOCAL_SYMBOL:
+ error(ERR_NONFATAL, "symbol `%s': [GLOBAL] directive must"
+ " appear before symbol definition", label);
+ break;
+ }
+}
+
+int init_labels (void) {
+ int i;
+
+ for (i=0; i<LABEL_HASHES; i++) {
+ ltab[i] = (union label *) nasm_malloc (LBLK_SIZE);
+ if (!ltab[i])
+ return -1; /* can't initialise, panic */
+ init_block (ltab[i]);
+ lfree[i] = ltab[i];
+ }
+
+ perm_head = perm_tail = (struct permts *) nasm_malloc (sizeof(struct permts));
+ if (!perm_head)
+ return -1;
+
+ perm_head->next = NULL;
+ perm_head->size = PERMTS_SIZE;
+ perm_head->usage = 0;
+
+ prevlabel = "";
+
+ return 0;
+}
+
+void cleanup_labels (void) {
+ int i;
+
+ for (i=0; i<LABEL_HASHES; i++) {
+ union label *lptr, *lhold;
+
+ lptr = lhold = ltab[i];
+
+ while (lptr) {
+ while (lptr->admin.movingon != END_BLOCK) lptr++;
+ lptr = lptr->admin.next;
+ nasm_free (lhold);
+ lhold = lptr;
+ }
+ }
+
+ while (perm_head) {
+ perm_tail = perm_head;
+ perm_head = perm_head->next;
+ nasm_free (perm_tail);
+ }
+}
+
+static void init_block (union label *blk) {
+ int j;
+
+ for (j=0; j<LABEL_BLOCK-1; j++)
+ blk[j].admin.movingon = END_LIST;
+ blk[LABEL_BLOCK-1].admin.movingon = END_BLOCK;
+ blk[LABEL_BLOCK-1].admin.next = NULL;
+}
+
+static char *perm_copy (char *string1, char *string2) {
+ char *p, *q;
+ int len = strlen(string1)+strlen(string2)+1;
+
+ if (perm_tail->size - perm_tail->usage < len) {
+ perm_tail->next = (struct permts *)nasm_malloc(sizeof(struct permts));
+ perm_tail = perm_tail->next;
+ perm_tail->size = PERMTS_SIZE;
+ perm_tail->usage = 0;
+ }
+ p = q = perm_tail->data + perm_tail->usage;
+ while ( (*q = *string1++) ) q++;
+ while ( (*q++ = *string2++) );
+ perm_tail->usage = q - perm_tail->data;
+
+ return p;
+}
diff --git a/labels.h b/labels.h
new file mode 100644
index 0000000..fb466ca
--- /dev/null
+++ b/labels.h
@@ -0,0 +1,17 @@
+/* labels.h header file for labels.c
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+int lookup_label (char *label, long *segment, long *offset);
+void define_label (char *label, long segment, long offset,
+ struct ofmt *ofmt, efunc error);
+void define_common (char *label, long segment, long size,
+ struct ofmt *ofmt, efunc error);
+void define_label_stub (char *label, efunc error);
+void declare_as_global (char *label, efunc error);
+int init_labels (void);
+void cleanup_labels (void);
diff --git a/lcc/Readme b/lcc/Readme
new file mode 100644
index 0000000..d37f812
--- /dev/null
+++ b/lcc/Readme
@@ -0,0 +1,57 @@
+This directory contains the necessary files to port the C compiler
+``LCC'' (available by FTP from sunsite.doc.ic.ac.uk in the directory
+/computing/programming/languages/c/lcc) to compile for Linux (a.out
+or ELF) by using NASM as a back-end code generator.
+
+This patch has been tested on lcc version 3.6.
+
+To install:
+
+- Copy `x86nasm.md' into the `src' directory of the lcc tree.
+
+- Copy either `lin-elf.c' or `lin-aout.c' into the `etc' directory.
+
+- If you're installing for a.out, edit `x86nasm.md' and change the
+ conditional after the comment reading "CHANGE THIS FOR a.out" in
+ the `defsymbol' function from `#if 0' to `#if 1'.
+
+- Make the following changes to `bind.c' in the `src' directory:
+
+ - Near the top of the file, add a line that reads
+ extern Interface x86nasmIR;
+
+ - In the `bindings' array, add the lines
+ "x86-nasm", &x86nasmIR,
+ "x86/nasm", &x86nasmIR,
+ (in sensible looking places...)
+
+ A sample `bind.c' has been provided to show what the result of
+ this might look like. You might be able to get away with using it
+ directly...
+
+- Modify the lcc makefile to include rules for x86nasm.o: this will
+ have to be done in about three places. Just copy any line with
+ `x86' on it and modify it to read `x86nasm' everywhere. (Except
+ that in the list of object files that rcc is made up from, do
+ remember to ensure that every line but the last has a trailing
+ backslash...)
+
+- You may have to modify the contents of `lin-elf.c' or `lin-aout.c'
+ to reflect the true locations of files such as crt0.o, crt1.o,
+ ld-linux.so and so forth. If you don't know where to find these,
+ compile a short C program with `gcc -v' and see what command line
+ gcc feeds to `ld'.
+
+- You should now be able to build lcc, using `lin-elf.c' or
+ `lin-aout.c' as the system-dependent part of the `lcc' wrapper
+ program.
+
+- Symlink x86nasm.c into the `src' directory before attempting the
+ triple test, or the compile will fail.
+
+- Now it should pass the triple test, on either ELF or a.out. Voila!
+
+Known potential problems:
+
+- The machine description may occasionally generate `db' lines that
+ are longer than NASM's 1024-character maximum.
diff --git a/lcc/bind.c b/lcc/bind.c
new file mode 100644
index 0000000..b0c1f51
--- /dev/null
+++ b/lcc/bind.c
@@ -0,0 +1,23 @@
+#include "c.h"
+extern Interface nullIR, symbolicIR;
+extern Interface mipsebIR, mipselIR;
+extern Interface sparcIR, solarisIR;
+extern Interface x86IR, x86nasmIR;
+Binding bindings[] = {
+ "symbolic", &symbolicIR,
+ "mips-irix", &mipsebIR,
+ "mips-ultrix", &mipselIR,
+ "sparc-sun", &sparcIR,
+ "sparc-solaris", &solarisIR,
+ "x86-dos", &x86IR,
+ "x86-nasm", &x86nasmIR,
+ "symbolic/irix", &symbolicIR, /* omit */
+ "mips/irix", &mipsebIR, /* omit */
+ "mips/ultrix", &mipselIR, /* omit */
+ "sparc/sun", &sparcIR, /* omit */
+ "sparc/solaris", &solarisIR, /* omit */
+ "x86/dos", &x86IR, /* omit */
+ "x86/nasm", &x86nasmIR, /* omit */
+ "null", &nullIR,
+ NULL, NULL
+};
diff --git a/lcc/lin-aout.c b/lcc/lin-aout.c
new file mode 100644
index 0000000..f1ac88a
--- /dev/null
+++ b/lcc/lin-aout.c
@@ -0,0 +1,44 @@
+/* x86 running linux and using nasm as a.out */
+
+#include <string.h>
+
+#ifndef LCCDIR
+#define LCCDIR "/usr/local/lib/lcc/"
+#endif
+
+#define NASMPATH "/usr/local/bin/nasm"
+
+char *cpp[] = { LCCDIR "cpp", "-D__STDC__=1",
+ "-Di386", "-D__i386", "-D__i386__",
+ "-Dlinux", "-D__linux", "-D__linux__",
+ "-Dunix", "-D__unix", "-D__unix__",
+ "$1", "$2", "$3", 0 };
+char *include[] = { "-I" LCCDIR "include", "-I/usr/local/include",
+ "-I/usr/include", 0 };
+char *com[] = { LCCDIR "rcc", "-target=x86/nasm",
+ "$1", "$2", "$3", 0 };
+char *as[] = { NASMPATH, "-faout", "-o", "$3", "$1", "$2", 0 };
+char *ld[] = { "/usr/bin/ld", "-m", "i386linux",
+ "-L/usr/i486-linuxaout/lib",
+ "-o", "$3", "$1",
+ "/usr/i486-linuxaout/lib/crt0.o",
+ "$2", "", "-lc", 0 };
+static char *bbexit = LCCDIR "bbexit.o";
+
+extern char *concat(char *, char *);
+extern int access(const char *, int);
+
+int option(char *arg) {
+ if (strncmp(arg, "-lccdir=", 8) == 0) {
+ cpp[0] = concat(&arg[8], "/cpp");
+ include[0] = concat("-I", concat(&arg[8], "/include"));
+ com[0] = concat(&arg[8], "/rcc");
+ bbexit = concat(&arg[8], "/bbexit.o");
+ } else if (strcmp(arg, "-g") == 0)
+ ;
+ else if (strcmp(arg, "-b") == 0 && access(bbexit, 4) == 0)
+ ld[9] = bbexit;
+ else
+ return 0;
+ return 1;
+}
diff --git a/lcc/lin-elf.c b/lcc/lin-elf.c
new file mode 100644
index 0000000..15df9e5
--- /dev/null
+++ b/lcc/lin-elf.c
@@ -0,0 +1,45 @@
+/* x86 running linux and using nasm as ELF */
+
+#include <string.h>
+
+#ifndef LCCDIR
+#define LCCDIR "/usr/local/lib/lcc/"
+#endif
+
+#define NASMPATH "/usr/local/bin/nasm"
+
+char *cpp[] = { LCCDIR "cpp", "-D__STDC__=1",
+ "-D__ELF__", "-Di386", "-D__i386", "-D__i386__",
+ "-Dlinux", "-D__linux", "-D__linux__",
+ "$1", "$2", "$3", 0 };
+char *include[] = { "-I" LCCDIR "include", "-I/usr/local/include",
+ "-I/usr/include", 0 };
+char *com[] = { LCCDIR "rcc", "-target=x86/nasm",
+ "$1", "$2", "$3", 0 };
+char *as[] = { NASMPATH, "-felf", "-o", "$3", "$1", "$2", 0 };
+char *ld[] = { "/usr/bin/ld", "-m", "elf_i386",
+ "-dynamic-linker", "/lib/ld-linux.so.1",
+ "-L/usr/i486-linux/lib",
+ "-o", "$3", "$1",
+ "/usr/lib/crt1.o", "/usr/lib/crti.o", "/usr/lib/crtbegin.o",
+ "$2", "",
+ "-lc", "", "/usr/lib/crtend.o", "/usr/lib/crtn.o", 0 };
+static char *bbexit = LCCDIR "bbexit.o";
+
+extern char *concat(char *, char *);
+extern int access(const char *, int);
+
+int option(char *arg) {
+ if (strncmp(arg, "-lccdir=", 8) == 0) {
+ cpp[0] = concat(&arg[8], "/cpp");
+ include[0] = concat("-I", concat(&arg[8], "/include"));
+ com[0] = concat(&arg[8], "/rcc");
+ bbexit = concat(&arg[8], "/bbexit.o");
+ } else if (strcmp(arg, "-g") == 0)
+ ;
+ else if (strcmp(arg, "-b") == 0 && access(bbexit, 4) == 0)
+ ld[13] = bbexit;
+ else
+ return 0;
+ return 1;
+}
diff --git a/lcc/x86nasm.md b/lcc/x86nasm.md
new file mode 100644
index 0000000..d709122
--- /dev/null
+++ b/lcc/x86nasm.md
@@ -0,0 +1,703 @@
+%{
+enum { EAX=0, ECX=1, EDX=2, EBX=3, ESI=6, EDI=7 };
+#include "c.h"
+#define NODEPTR_TYPE Node
+#define OP_LABEL(p) ((p)->op)
+#define LEFT_CHILD(p) ((p)->kids[0])
+#define RIGHT_CHILD(p) ((p)->kids[1])
+#define STATE_LABEL(p) ((p)->x.state)
+static void address ARGS((Symbol, Symbol, int));
+static void blkfetch ARGS((int, int, int, int));
+static void blkloop ARGS((int, int, int, int, int, int[]));
+static void blkstore ARGS((int, int, int, int));
+static void defaddress ARGS((Symbol));
+static void defconst ARGS((int, Value));
+static void defstring ARGS((int, char *));
+static void defsymbol ARGS((Symbol));
+static void doarg ARGS((Node));
+static void emit2 ARGS((Node));
+static void export ARGS((Symbol));
+static void clobber ARGS((Node));
+static void function ARGS((Symbol, Symbol [], Symbol [], int));
+static void global ARGS((Symbol));
+static void import ARGS((Symbol));
+static void local ARGS((Symbol));
+static void progbeg ARGS((int, char **));
+static void progend ARGS((void));
+static void segment ARGS((int));
+static void space ARGS((int));
+static void target ARGS((Node));
+static int ckstack ARGS((Node, int));
+static int memop ARGS((Node));
+static int sametree ARGS((Node, Node));
+static Symbol charreg[32], shortreg[32], intreg[32];
+static Symbol fltreg[32];
+
+static int cseg;
+
+static Symbol quo, rem;
+
+%}
+%start stmt
+%term ADDD=306 ADDF=305 ADDI=309 ADDP=311 ADDU=310
+%term ADDRFP=279
+%term ADDRGP=263
+%term ADDRLP=295
+%term ARGB=41 ARGD=34 ARGF=33 ARGI=37 ARGP=39
+%term ASGNB=57 ASGNC=51 ASGND=50 ASGNF=49 ASGNI=53 ASGNP=55 ASGNS=52
+%term BANDU=390
+%term BCOMU=406
+%term BORU=422
+%term BXORU=438
+%term CALLB=217 CALLD=210 CALLF=209 CALLI=213 CALLV=216
+%term CNSTC=19 CNSTD=18 CNSTF=17 CNSTI=21 CNSTP=23 CNSTS=20 CNSTU=22
+%term CVCI=85 CVCU=86
+%term CVDF=97 CVDI=101
+%term CVFD=114
+%term CVIC=131 CVID=130 CVIS=132 CVIU=134
+%term CVPU=150
+%term CVSI=165 CVSU=166
+%term CVUC=179 CVUI=181 CVUP=183 CVUS=180
+%term DIVD=450 DIVF=449 DIVI=453 DIVU=454
+%term EQD=482 EQF=481 EQI=485
+%term GED=498 GEF=497 GEI=501 GEU=502
+%term GTD=514 GTF=513 GTI=517 GTU=518
+%term INDIRB=73 INDIRC=67 INDIRD=66 INDIRF=65 INDIRI=69 INDIRP=71 INDIRS=68
+%term JUMPV=584
+%term LABELV=600
+%term LED=530 LEF=529 LEI=533 LEU=534
+%term LOADB=233 LOADC=227 LOADD=226 LOADF=225 LOADI=229 LOADP=231 LOADS=228 LOADU=230
+%term LSHI=341 LSHU=342
+%term LTD=546 LTF=545 LTI=549 LTU=550
+%term MODI=357 MODU=358
+%term MULD=466 MULF=465 MULI=469 MULU=470
+%term NED=562 NEF=561 NEI=565
+%term NEGD=194 NEGF=193 NEGI=197
+%term RETD=242 RETF=241 RETI=245
+%term RSHI=373 RSHU=374
+%term SUBD=322 SUBF=321 SUBI=325 SUBP=327 SUBU=326
+%term VREGP=615
+%%
+reg: INDIRC(VREGP) "# read register\n"
+reg: INDIRD(VREGP) "# read register\n"
+reg: INDIRF(VREGP) "# read register\n"
+reg: INDIRI(VREGP) "# read register\n"
+reg: INDIRP(VREGP) "# read register\n"
+reg: INDIRS(VREGP) "# read register\n"
+stmt: ASGNC(VREGP,reg) "# write register\n"
+stmt: ASGND(VREGP,reg) "# write register\n"
+stmt: ASGNF(VREGP,reg) "# write register\n"
+stmt: ASGNI(VREGP,reg) "# write register\n"
+stmt: ASGNP(VREGP,reg) "# write register\n"
+stmt: ASGNS(VREGP,reg) "# write register\n"
+con: CNSTC "%a"
+con: CNSTI "%a"
+con: CNSTP "%a"
+con: CNSTS "%a"
+con: CNSTU "%a"
+stmt: reg ""
+reg: CVIU(reg) "%0" notarget(a)
+reg: CVPU(reg) "%0" notarget(a)
+reg: CVUI(reg) "%0" notarget(a)
+reg: CVUP(reg) "%0" notarget(a)
+acon: ADDRGP "%a"
+acon: con "%0"
+base: ADDRGP "%a"
+base: reg "%0"
+base: ADDI(reg,acon) "%0 + (%1)"
+base: ADDP(reg,acon) "%0 + (%1)"
+base: ADDU(reg,acon) "%0 + (%1)"
+base: ADDRFP "ebp + %a"
+base: ADDRLP "ebp + %a"
+index: reg "%0"
+index: LSHI(reg,con1) "%0*2"
+index: LSHI(reg,con2) "%0*4"
+index: LSHI(reg,con3) "%0*8"
+
+con1: CNSTI "1" range(a, 1, 1)
+con1: CNSTU "1" range(a, 1, 1)
+con2: CNSTI "2" range(a, 2, 2)
+con2: CNSTU "2" range(a, 2, 2)
+con3: CNSTI "3" range(a, 3, 3)
+con3: CNSTU "3" range(a, 3, 3)
+index: LSHU(reg,con1) "%0*2"
+index: LSHU(reg,con2) "%0*4"
+index: LSHU(reg,con3) "%0*8"
+addr: base "[%0]"
+addr: ADDI(index,base) "[%1 + %0]"
+addr: ADDP(index,base) "[%1 + %0]"
+addr: ADDU(index,base) "[%1 + %0]"
+addr: index "[%0]"
+mem: INDIRC(addr) "byte %0"
+mem: INDIRI(addr) "dword %0"
+mem: INDIRP(addr) "dword %0"
+mem: INDIRS(addr) "word %0"
+rc: reg "%0"
+rc: con "%0"
+
+mr: reg "%0"
+mr: mem "%0"
+
+mrc0: mem "%0"
+mrc0: rc "%0"
+mrc1: mem "%0" 1
+mrc1: rc "%0"
+
+mrc3: mem "%0" 3
+mrc3: rc "%0"
+reg: addr "lea %c,%0\n" 1
+reg: mrc0 "mov %c,%0\n" 1
+reg: LOADC(reg) "mov %c,%0\n" move(a)
+reg: LOADI(reg) "mov %c,%0\n" move(a)
+reg: LOADP(reg) "mov %c,%0\n" move(a)
+reg: LOADS(reg) "mov %c,%0\n" move(a)
+reg: LOADU(reg) "mov %c,%0\n" move(a)
+reg: ADDI(reg,mrc1) "?mov %c,%0\nadd %c,%1\n" 1
+reg: ADDP(reg,mrc1) "?mov %c,%0\nadd %c,%1\n" 1
+reg: ADDU(reg,mrc1) "?mov %c,%0\nadd %c,%1\n" 1
+reg: SUBI(reg,mrc1) "?mov %c,%0\nsub %c,%1\n" 1
+reg: SUBP(reg,mrc1) "?mov %c,%0\nsub %c,%1\n" 1
+reg: SUBU(reg,mrc1) "?mov %c,%0\nsub %c,%1\n" 1
+reg: BANDU(reg,mrc1) "?mov %c,%0\nand %c,%1\n" 1
+reg: BORU(reg,mrc1) "?mov %c,%0\nor %c,%1\n" 1
+reg: BXORU(reg,mrc1) "?mov %c,%0\nxor %c,%1\n" 1
+stmt: ASGNI(addr,ADDI(mem,con1)) "inc %1\n" memop(a)
+stmt: ASGNI(addr,ADDU(mem,con1)) "inc %1\n" memop(a)
+stmt: ASGNP(addr,ADDP(mem,con1)) "inc %1\n" memop(a)
+stmt: ASGNI(addr,SUBI(mem,con1)) "dec %1\n" memop(a)
+stmt: ASGNI(addr,SUBU(mem,con1)) "dec %1\n" memop(a)
+stmt: ASGNP(addr,SUBP(mem,con1)) "dec %1\n" memop(a)
+stmt: ASGNI(addr,ADDI(mem,rc)) "add %1,%2\n" memop(a)
+stmt: ASGNI(addr,ADDU(mem,rc)) "add %1,%2\n" memop(a)
+stmt: ASGNI(addr,SUBI(mem,rc)) "sub %1,%2\n" memop(a)
+stmt: ASGNI(addr,SUBU(mem,rc)) "sub %1,%2\n" memop(a)
+
+stmt: ASGNI(addr,BANDU(mem,rc)) "and %1,%2\n" memop(a)
+stmt: ASGNI(addr,BORU(mem,rc)) "or %1,%2\n" memop(a)
+stmt: ASGNI(addr,BXORU(mem,rc)) "xor %1,%2\n" memop(a)
+reg: BCOMU(reg) "?mov %c,%0\nnot %c\n" 2
+reg: NEGI(reg) "?mov %c,%0\nneg %c\n" 2
+
+stmt: ASGNI(addr,BCOMU(mem)) "not %1\n" memop(a)
+stmt: ASGNI(addr,NEGI(mem)) "neg %1\n" memop(a)
+reg: LSHI(reg,rc5) "?mov %c,%0\nsal %c,%1\n" 2
+reg: LSHU(reg,rc5) "?mov %c,%0\nshl %c,%1\n" 2
+reg: RSHI(reg,rc5) "?mov %c,%0\nsar %c,%1\n" 2
+reg: RSHU(reg,rc5) "?mov %c,%0\nshr %c,%1\n" 2
+
+stmt: ASGNI(addr,LSHI(mem,rc5)) "sal %1,%2\n" memop(a)
+stmt: ASGNI(addr,LSHU(mem,rc5)) "shl %1,%2\n" memop(a)
+stmt: ASGNI(addr,RSHI(mem,rc5)) "sar %1,%2\n" memop(a)
+stmt: ASGNI(addr,RSHU(mem,rc5)) "shr %1,%2\n" memop(a)
+
+rc5: CNSTI "%a" range(a, 0, 31)
+rc5: reg "cl"
+reg: MULI(reg,mrc3) "?mov %c,%0\nimul %c,%1\n" 14
+reg: MULI(con,mr) "imul %c,%1,%0\n" 13
+reg: MULU(reg,mr) "mul %1\n" 13
+reg: DIVU(reg,reg) "xor edx,edx\ndiv %1\n"
+reg: MODU(reg,reg) "xor edx,edx\ndiv %1\n"
+reg: DIVI(reg,reg) "cdq\nidiv %1\n"
+reg: MODI(reg,reg) "cdq\nidiv %1\n"
+reg: CVIU(reg) "mov %c,%0\n" move(a)
+reg: CVPU(reg) "mov %c,%0\n" move(a)
+reg: CVUI(reg) "mov %c,%0\n" move(a)
+reg: CVUP(reg) "mov %c,%0\n" move(a)
+reg: CVCI(INDIRC(addr)) "movsx %c,byte %0\n" 3
+reg: CVCU(INDIRC(addr)) "movzx %c,byte %0\n" 3
+reg: CVSI(INDIRS(addr)) "movsx %c,word %0\n" 3
+reg: CVSU(INDIRS(addr)) "movzx %c,word %0\n" 3
+reg: CVCI(reg) "# extend\n" 3
+reg: CVCU(reg) "# extend\n" 3
+reg: CVSI(reg) "# extend\n" 3
+reg: CVSU(reg) "# extend\n" 3
+
+reg: CVIC(reg) "# truncate\n" 1
+reg: CVIS(reg) "# truncate\n" 1
+reg: CVUC(reg) "# truncate\n" 1
+reg: CVUS(reg) "# truncate\n" 1
+stmt: ASGNC(addr,rc) "mov byte %0,%1\n" 1
+stmt: ASGNI(addr,rc) "mov dword %0,%1\n" 1
+stmt: ASGNP(addr,rc) "mov dword %0,%1\n" 1
+stmt: ASGNS(addr,rc) "mov word %0,%1\n" 1
+stmt: ARGI(mrc3) "push dword %0\n" 1
+stmt: ARGP(mrc3) "push dword %0\n" 1
+stmt: ASGNB(reg,INDIRB(reg)) "mov ecx,%a\nrep movsb\n"
+stmt: ARGB(INDIRB(reg)) "sub esp,%a\nmov edi,esp\nmov ecx,%a\nrep movsb\n"
+
+memf: INDIRD(addr) "qword %0"
+memf: INDIRF(addr) "dword %0"
+memf: CVFD(INDIRF(addr)) "dword %0"
+reg: memf "fld %0\n" 3
+stmt: ASGND(addr,reg) "fstp qword %0\n" 7
+stmt: ASGNF(addr,reg) "fstp dword %0\n" 7
+stmt: ASGNF(addr,CVDF(reg)) "fstp dword %0\n" 7
+stmt: ARGD(reg) "sub esp,8\nfstp qword [esp]\n"
+stmt: ARGF(reg) "sub esp,4\nfstp dword [esp]\n"
+reg: NEGD(reg) "fchs\n"
+reg: NEGF(reg) "fchs\n"
+reg: ADDD(reg,memf) "fadd %1\n"
+reg: ADDD(reg,reg) "faddp st1\n"
+reg: ADDF(reg,memf) "fadd %1\n"
+reg: ADDF(reg,reg) "faddp st1\n"
+reg: DIVD(reg,memf) "fdiv %1\n"
+reg: DIVD(reg,reg) "fdivrp st1\n"
+reg: DIVF(reg,memf) "fdiv %1\n"
+reg: DIVF(reg,reg) "fdivrp st1\n"
+reg: MULD(reg,memf) "fmul %1\n"
+reg: MULD(reg,reg) "fmulp st1\n"
+reg: MULF(reg,memf) "fmul %1\n"
+reg: MULF(reg,reg) "fmulp st1\n"
+reg: SUBD(reg,memf) "fsub %1\n"
+reg: SUBD(reg,reg) "fsubrp st1\n"
+reg: SUBF(reg,memf) "fsub %1\n"
+reg: SUBF(reg,reg) "fsubrp st1\n"
+reg: CVFD(reg) "# CVFD\n"
+reg: CVDF(reg) "sub esp,4\nfstp dword [esp]\nfld dword [esp]\nadd esp,4\n" 12
+
+stmt: ASGNI(addr,CVDI(reg)) "fistp dword %0\n" 29
+reg: CVDI(reg) "sub esp,4\nfistp dword [esp]\npop %c\n" 31
+
+reg: CVID(INDIRI(addr)) "fild dword %0\n" 10
+reg: CVID(reg) "push %0\nfild dword [esp]\nadd esp,4\n" 12
+
+addrj: ADDRGP "%a"
+addrj: reg "%0" 2
+addrj: mem "%0" 2
+
+stmt: JUMPV(addrj) "jmp %0\n" 3
+stmt: LABELV "%a:\n"
+stmt: EQI(mem,rc) "cmp %0,%1\nje near %a\n" 5
+stmt: GEI(mem,rc) "cmp %0,%1\njge near %a\n" 5
+stmt: GTI(mem,rc) "cmp %0,%1\njg near %a\n" 5
+stmt: LEI(mem,rc) "cmp %0,%1\njle near %a\n" 5
+stmt: LTI(mem,rc) "cmp %0,%1\njl near %a\n" 5
+stmt: NEI(mem,rc) "cmp %0,%1\njne near %a\n" 5
+stmt: GEU(mem,rc) "cmp %0,%1\njae near %a\n" 5
+stmt: GTU(mem,rc) "cmp %0,%1\nja near %a\n" 5
+stmt: LEU(mem,rc) "cmp %0,%1\njbe near %a\n" 5
+stmt: LTU(mem,rc) "cmp %0,%1\njb near %a\n" 5
+stmt: EQI(reg,mrc1) "cmp %0,%1\nje near %a\n" 4
+stmt: GEI(reg,mrc1) "cmp %0,%1\njge near %a\n" 4
+stmt: GTI(reg,mrc1) "cmp %0,%1\njg near %a\n" 4
+stmt: LEI(reg,mrc1) "cmp %0,%1\njle near %a\n" 4
+stmt: LTI(reg,mrc1) "cmp %0,%1\njl near %a\n" 4
+stmt: NEI(reg,mrc1) "cmp %0,%1\njne near %a\n" 4
+
+stmt: GEU(reg,mrc1) "cmp %0,%1\njae near %a\n" 4
+stmt: GTU(reg,mrc1) "cmp %0,%1\nja near %a\n" 4
+stmt: LEU(reg,mrc1) "cmp %0,%1\njbe near %a\n" 4
+stmt: LTU(reg,mrc1) "cmp %0,%1\njb near %a\n" 4
+cmpf: memf " %0"
+cmpf: reg "p"
+stmt: EQD(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nje near %a\n"
+stmt: GED(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njbe near %a\n"
+stmt: GTD(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njb near %a\n"
+stmt: LED(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njae near %a\n"
+stmt: LTD(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nja near %a\n"
+stmt: NED(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njne near %a\n"
+
+stmt: EQF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nje near %a\n"
+stmt: GEF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njbe near %a\n"
+stmt: GTF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njb near %a\n"
+stmt: LEF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njae near %a\n"
+stmt: LTF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nja near %a\n"
+stmt: NEF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njne near %a\n"
+reg: CALLI(addrj) "call %0\nadd esp,%a\n"
+stmt: CALLV(addrj) "call %0\nadd esp,%a\n"
+reg: CALLF(addrj) "call %0\nadd esp,%a\n"
+reg: CALLD(addrj) "call %0\nadd esp,%a\n"
+
+stmt: RETI(reg) "# ret\n"
+stmt: RETF(reg) "# ret\n"
+stmt: RETD(reg) "# ret\n"
+%%
+static void progbeg(argc, argv) int argc; char *argv[]; {
+ int i;
+
+ {
+ union {
+ char c;
+ int i;
+ } u;
+ u.i = 0;
+ u.c = 1;
+ swap = (u.i == 1) != IR->little_endian;
+ }
+ parseflags(argc, argv);
+ intreg[EAX] = mkreg("eax", EAX, 1, IREG);
+ intreg[EDX] = mkreg("edx", EDX, 1, IREG);
+ intreg[ECX] = mkreg("ecx", ECX, 1, IREG);
+ intreg[EBX] = mkreg("ebx", EBX, 1, IREG);
+ intreg[ESI] = mkreg("esi", ESI, 1, IREG);
+ intreg[EDI] = mkreg("edi", EDI, 1, IREG);
+ shortreg[EAX] = mkreg("ax", EAX, 1, IREG);
+ shortreg[ECX] = mkreg("cx", ECX, 1, IREG);
+ shortreg[EDX] = mkreg("dx", EDX, 1, IREG);
+ shortreg[EBX] = mkreg("bx", EBX, 1, IREG);
+ shortreg[ESI] = mkreg("si", ESI, 1, IREG);
+ shortreg[EDI] = mkreg("di", EDI, 1, IREG);
+
+ charreg[EAX] = mkreg("al", EAX, 1, IREG);
+ charreg[ECX] = mkreg("cl", ECX, 1, IREG);
+ charreg[EDX] = mkreg("dl", EDX, 1, IREG);
+ charreg[EBX] = mkreg("bl", EBX, 1, IREG);
+ for (i = 0; i < 8; i++)
+ fltreg[i] = mkreg("%d", i, 0, FREG);
+ rmap[C] = mkwildcard(charreg);
+ rmap[S] = mkwildcard(shortreg);
+ rmap[P] = rmap[B] = rmap[U] = rmap[I] = mkwildcard(intreg);
+ rmap[F] = rmap[D] = mkwildcard(fltreg);
+ tmask[IREG] = (1<<EDI) | (1<<ESI) | (1<<EBX)
+ | (1<<EDX) | (1<<ECX) | (1<<EAX);
+ vmask[IREG] = 0;
+ tmask[FREG] = 0xff;
+ vmask[FREG] = 0;
+ cseg = 0;
+ quo = mkreg("eax", EAX, 1, IREG);
+ quo->x.regnode->mask |= 1<<EDX;
+ rem = mkreg("edx", EDX, 1, IREG);
+ rem->x.regnode->mask |= 1<<EAX;
+}
+static void segment(n) int n; {
+ if (n == cseg)
+ return;
+ cseg = n;
+ if (cseg == CODE)
+ print("[section .text]\n");
+ else if (cseg == DATA || cseg == LIT)
+ print("[section .data]\n");
+ else if (cseg == BSS)
+ print("[section .bss]\n");
+}
+static void progend() {
+
+}
+static void target(p) Node p; {
+ assert(p);
+ switch (p->op) {
+ case RSHI: case RSHU: case LSHI: case LSHU:
+ if (generic(p->kids[1]->op) != CNST
+ && !( generic(p->kids[1]->op) == INDIR
+ && p->kids[1]->kids[0]->op == VREG+P
+ && p->kids[1]->syms[RX]->u.t.cse
+ && generic(p->kids[1]->syms[RX]->u.t.cse->op) == CNST
+)) {
+ rtarget(p, 1, intreg[ECX]);
+ setreg(p, intreg[EAX]);
+ }
+ break;
+ case MULU:
+ setreg(p, quo);
+ rtarget(p, 0, intreg[EAX]);
+ break;
+ case DIVI: case DIVU:
+ setreg(p, quo);
+ rtarget(p, 0, intreg[EAX]);
+ rtarget(p, 1, intreg[ECX]);
+ break;
+ case MODI: case MODU:
+ setreg(p, rem);
+ rtarget(p, 0, intreg[EAX]);
+ rtarget(p, 1, intreg[ECX]);
+ break;
+ case ASGNB:
+ rtarget(p, 0, intreg[EDI]);
+ rtarget(p->kids[1], 0, intreg[ESI]);
+ break;
+ case ARGB:
+ rtarget(p->kids[0], 0, intreg[ESI]);
+ break;
+ case CALLI: case CALLV:
+ setreg(p, intreg[EAX]);
+ break;
+ case RETI:
+ rtarget(p, 0, intreg[EAX]);
+ break;
+ }
+}
+
+static void clobber(p) Node p; {
+ static int nstack = 0;
+
+ assert(p);
+ nstack = ckstack(p, nstack);
+ assert(p->count > 0 || nstack == 0);
+ switch (p->op) {
+ case ASGNB: case ARGB:
+ spill(1<<ECX | 1<<ESI | 1<<EDI, IREG, p);
+ break;
+ case EQD: case LED: case GED: case LTD: case GTD: case NED:
+ case EQF: case LEF: case GEF: case LTF: case GTF: case NEF:
+ spill(1<<EAX, IREG, p);
+ break;
+ case CALLD: case CALLF:
+ spill(1<<EDX | 1<<EAX, IREG, p);
+ break;
+ }
+}
+#define isfp(p) (optype((p)->op)==F || optype((p)->op)==D)
+
+static int ckstack(p, n) Node p; int n; {
+ int i;
+
+ for (i = 0; i < NELEMS(p->x.kids) && p->x.kids[i]; i++)
+ if (isfp(p->x.kids[i]))
+ n--;
+ if (isfp(p) && p->count > 0)
+ n++;
+ if (n > 8)
+ error("expression too complicated\n");
+ debug(fprint(2, "(ckstack(%x)=%d)\n", p, n));
+ assert(n >= 0);
+ return n;
+}
+static int memop(p) Node p; {
+ assert(p);
+ assert(generic(p->op) == ASGN);
+ assert(p->kids[0]);
+ assert(p->kids[1]);
+ if (generic(p->kids[1]->kids[0]->op) == INDIR
+ && sametree(p->kids[0], p->kids[1]->kids[0]->kids[0]))
+ return 3;
+ else
+ return LBURG_MAX;
+}
+static int sametree(p, q) Node p, q; {
+ return p == NULL && q == NULL
+ || p && q && p->op == q->op && p->syms[0] == q->syms[0]
+ && sametree(p->kids[0], q->kids[0])
+ && sametree(p->kids[1], q->kids[1]);
+}
+static void emit2(p) Node p; {
+#define preg(f) ((f)[getregnum(p->x.kids[0])]->x.name)
+
+ if (p->op == CVCI)
+ print("movsx %s,%s\n", p->syms[RX]->x.name
+, preg(charreg));
+ else if (p->op == CVCU)
+ print("movzx %s,%s\n", p->syms[RX]->x.name
+, preg(charreg));
+ else if (p->op == CVSI)
+ print("movsx %s,%s\n", p->syms[RX]->x.name
+, preg(shortreg));
+ else if (p->op == CVSU)
+ print("movzx %s,%s\n", p->syms[RX]->x.name
+, preg(shortreg));
+ else if (p->op == CVIC || p->op == CVIS
+ || p->op == CVUC || p->op == CVUS) {
+ char *dst = shortreg[getregnum(p)]->x.name;
+ char *src = preg(shortreg);
+ if (dst != src)
+ print("mov %s,%s\n", dst, src);
+ }
+}
+
+static void doarg(p) Node p; {
+ assert(p && p->syms[0]);
+ mkactual(4, p->syms[0]->u.c.v.i);
+}
+static void blkfetch(k, off, reg, tmp)
+int k, off, reg, tmp; {}
+static void blkstore(k, off, reg, tmp)
+int k, off, reg, tmp; {}
+static void blkloop(dreg, doff, sreg, soff, size, tmps)
+int dreg, doff, sreg, soff, size, tmps[]; {}
+static void local(p) Symbol p; {
+ if (isfloat(p->type))
+ p->sclass = AUTO;
+ if (askregvar(p, rmap[ttob(p->type)]) == 0)
+ mkauto(p);
+}
+static void function(f, caller, callee, n)
+Symbol f, callee[], caller[]; int n; {
+ int i;
+
+ print("%s:\n", f->x.name);
+ print("push ebx\n");
+ print("push esi\n");
+ print("push edi\n");
+ print("push ebp\n");
+ print("mov ebp,esp\n");
+usedmask[0] = usedmask[1] = 0;
+freemask[0] = freemask[1] = ~(unsigned)0;
+ offset = 16 + 4;
+ for (i = 0; callee[i]; i++) {
+ Symbol p = callee[i];
+ Symbol q = caller[i];
+ assert(q);
+ p->x.offset = q->x.offset = offset;
+ p->x.name = q->x.name = stringf("%d", p->x.offset);
+ p->sclass = q->sclass = AUTO;
+ offset += roundup(q->type->size, 4);
+ }
+ assert(caller[i] == 0);
+ offset = maxoffset = 0;
+ gencode(caller, callee);
+ framesize = roundup(maxoffset, 4);
+ if (framesize > 0)
+ print("sub esp,%d\n", framesize);
+ emitcode();
+ print("mov esp,ebp\n");
+ print("pop ebp\n");
+ print("pop edi\n");
+ print("pop esi\n");
+ print("pop ebx\n");
+ print("ret\n");
+}
+static void defsymbol(p) Symbol p; {
+ if (p->scope >= LOCAL && p->sclass == STATIC)
+ p->x.name = stringf("L%d", genlabel(1));
+ else if (p->generated)
+ p->x.name = stringf("$L%s", p->name);
+ else if (p->scope == GLOBAL || p->sclass == EXTERN)
+ /* CHANGE THIS FOR a.out */
+#if 0
+ p->x.name = stringf("$_%s", p->name);
+#else
+ p->x.name = stringf("$%s", p->name);
+#endif
+ else if (p->scope == CONSTANTS
+ && (isint(p->type) || isptr(p->type))
+ && p->name[0] == '0' && p->name[1] == 'x')
+ p->x.name = stringf("0%sH", &p->name[2]);
+ else
+ p->x.name = p->name;
+}
+static void address(q, p, n) Symbol q, p; int n; {
+ if (p->scope == GLOBAL
+ || p->sclass == STATIC || p->sclass == EXTERN)
+ q->x.name = stringf("%s%s%d",
+ p->x.name, n >= 0 ? "+" : "", n);
+ else {
+ q->x.offset = p->x.offset + n;
+ q->x.name = stringd(q->x.offset);
+ }
+}
+static void defconst(ty, v) int ty; Value v; {
+ switch (ty) {
+ case C: print("db %d\n", v.uc); return;
+ case S: print("dw %d\n", v.ss); return;
+ case I: print("dd %d\n", v.i ); return;
+ case U: print("dd 0%xH\n", v.u ); return;
+ case P: print("dd 0%xH\n", v.p ); return;
+ case F:
+ print("dd 0%xH\n", *(unsigned *)&v.f);
+ return;
+ case D: {
+ unsigned *p = (unsigned *)&v.d;
+ print("dd 0%xH,0%xH\n", p[swap], p[1 - swap]);
+ return;
+ }
+ }
+ assert(0);
+}
+static void defaddress(p) Symbol p; {
+ print("dd %s\n", p->x.name);
+}
+static void defstring(n, str) int n; char *str; {
+ char *s;
+ int inquote = 1;
+
+ print("db '");
+
+ for (s = str; s < str + n; s++)
+ {
+ if ((*s & 0x7F) == *s && *s >= ' ' && *s != '\'') {
+ if (!inquote){
+ print(", '");
+ inquote = 1;
+ }
+ print("%c",*s);
+ }
+ else
+ {
+ if (inquote){
+ print("', ");
+ inquote = 0;
+ }
+ else
+ print(", ");
+ print("%d",*s);
+ }
+ }
+ if (inquote) print("'");
+ print("\n");
+}
+static void export(p) Symbol p; {
+ print("[global %s]\n", p->x.name);
+}
+static void import(p) Symbol p; {
+ if (p->ref > 0) {
+ print("[extern %s]\n", p->x.name);
+ }
+}
+static void global(p) Symbol p; {
+ int i;
+
+ if (p->u.seg == BSS)
+ print("resb ($-$$) & %d\n",
+ p->type->align > 4 ? 3 : p->type->align-1);
+ else
+ print("times ($-$$) & %d nop\n",
+ p->type->align > 4 ? 3 : p->type->align-1);
+ print("%s:\n", p->x.name);
+ if (p->u.seg == BSS)
+ print("resb %d\n", p->type->size);
+}
+static void space(n) int n; {
+ int i;
+
+ if (cseg != BSS)
+ print("times %d db 0\n", n);
+}
+Interface x86nasmIR = {
+ 1, 1, 0, /* char */
+ 2, 2, 0, /* short */
+ 4, 4, 0, /* int */
+ 4, 4, 1, /* float */
+ 8, 4, 1, /* double */
+ 4, 4, 0, /* T * */
+ 0, 4, 0, /* struct; so that ARGB keeps stack aligned */
+ 1, /* little_endian */
+ 0, /* mulops_calls */
+ 0, /* wants_callb */
+ 1, /* wants_argb */
+ 0, /* left_to_right */
+ 0, /* wants_dag */
+ address,
+ blockbeg,
+ blockend,
+ defaddress,
+ defconst,
+ defstring,
+ defsymbol,
+ emit,
+ export,
+ function,
+ gen,
+ global,
+ import,
+ local,
+ progbeg,
+ progend,
+ segment,
+ space,
+ 0, 0, 0, 0, 0, 0, 0,
+ {1, blkfetch, blkstore, blkloop,
+ _label,
+ _rule,
+ _nts,
+ _kids,
+ _opname,
+ _arity,
+ _string,
+ _templates,
+ _isinstruction,
+ _ntname,
+ emit2,
+ doarg,
+ target,
+ clobber,
+}
+};
diff --git a/misc/magic b/misc/magic
new file mode 100644
index 0000000..0172f4a
--- /dev/null
+++ b/misc/magic
@@ -0,0 +1,6 @@
+# Put the following lines in your /etc/magic file to get 'file' to recognise
+# RDOFF Object Files
+
+0 string RDOFF RDOFF Object File
+>5 byte >32 version %c (little endian)
+>5 byte <32 version %d (big endian)
diff --git a/misc/nasm.sl b/misc/nasm.sl
new file mode 100644
index 0000000..be4d30b
--- /dev/null
+++ b/misc/nasm.sl
@@ -0,0 +1,305 @@
+% This file defines a NASM editor mode for the JED editor.
+% JED's home page is http://space.mit.edu/~davis/jed.html.
+%
+% To install, copy this file into your JED_LIBRARY directory
+% (/usr/local/jed/lib or C:\JED\LIB or whatever), then add the
+% following lines to your .jedrc or jed.rc file:
+% autoload("nasm_mode", "nasm");
+% add_mode_for_extension("nasm", "asm");
+% (you can of course replace "asm" with whatever file extension
+% you like to use for your NASM source files).
+
+variable Nasm_Instruction_Indent = 10;
+variable Nasm_Comment_Column = 33;
+variable Nasm_Comment_Space = 1;
+
+variable nasm_kw_2 = strcat("ahalaxbhblbpbtbxchclcscxdbdddhdidldqdsdtdwdxes",
+ "fsgsinjajbjcjejgjljojpjsjzorsispssto");
+variable nasm_kw_3 = strncat("a16a32aaaaadaamaasadcaddandbsfbsrbtcbtrbtscbw",
+ "cdqclccldclicmccmpcr0cr2cr3cr4cwddaadasdecdiv",
+ "dr0dr1dr2dr3dr6dr7eaxebpebxecxediedxequesiesp",
+ "farfldfsthltincintjaejbejgejlejmpjnajnbjncjne",
+ "jngjnljnojnpjnsjnzjpejpolarldslealeslfslgslsl",
+ "lssltrmm0mm1mm2mm3mm4mm5mm6mm7movmulnegnopnot",
+ "o16o32outpopporrclrcrrepretrolrorrsmsalsarsbb",
+ "segshlshrst0st1st2st3st4st5st6st7stcstdstistr",
+ "subtr3tr4tr5tr6tr7wrtxor", 9);
+variable nasm_kw_4 = strncat("arplbytecallcltscwdeemmsfabsfaddfbldfchsfcom",
+ "fcosfdivfenifildfistfld1fldzfmulfnopfsinfstp",
+ "fsubftstfxamfxchidivimulinsbinsdinswint3into",
+ "invdiretjcxzjnaejnbejngejnlelahflgdtlidtlldt",
+ "lmswlocklongloopmovdmovqnearpandpopapopfpush",
+ "pxorreperepzresbresdreswretfretnsahfsetasetb",
+ "setcsetesetgsetlsetosetpsetssetzsgdtshldshrd",
+ "sidtsldtsmswtestverrverwwaitwordxaddxchg", 8);
+variable nasm_kw_5 = strncat("boundbswapcmpsbcmpsdcmpswcpuiddwordenterf2xm1",
+ "faddpfbstpfclexfcompfdisifdivpfdivrffreefiadd",
+ "ficomfidivfimulfinitfistpfisubfldcwfldpifmulp",
+ "fpremfptanfsavefsqrtfstcwfstswfsubpfsubrfucom",
+ "fyl2xiretdiretwjecxzleavelodsblodsdlodswloope",
+ "loopzmovsbmovsdmovswmovsxmovzxoutsboutsdoutsw",
+ "paddbpadddpaddwpandnpopadpopawpopfdpopfwpslld",
+ "psllqpsllwpsradpsrawpsrldpsrlqpsrlwpsubbpsubd",
+ "psubwpushapushfqwordrdmsrrdtscrepnerepnzscasb",
+ "scasdscaswsetaesetbesetgesetlesetnasetnbsetnc",
+ "setnesetngsetnlsetnosetnpsetnssetnzsetpesetpo",
+ "shortstosbstosdstoswtimestwordwrmsrxlatb", 12);
+variable nasm_kw_6 = strncat("fcomppfdivrpficompfidivrfisubrfldenvfldl2e",
+ "fldl2tfldlg2fldln2fpatanfprem1frstorfscale",
+ "fsetpmfstenvfsubrpfucompinvlpgloopneloopnz",
+ "paddsbpaddswpmulhwpmullwpsubsbpsubswpushad",
+ "pushawpushfdpushfwsetnaesetnbesetngesetnle",
+ "wbinvd", 6);
+variable nasm_kw_7 = strncat("cmpxchgfdecstpfincstpfrndintfsincosfucompp",
+ "fxtractfyl2xp1paddusbpadduswpcmpeqbpcmpeqd",
+ "pcmpeqwpcmpgtbpcmpgtdpcmpgtwpmaddwdpsubusb",
+ "psubusw", 4);
+variable nasm_kw_8 = "packssdwpacksswbpackuswb";
+variable nasm_kw_9 = strcat("cmpxchg8bpunpckhbwpunpckhdqpunpckhwdpunpcklbw",
+ "punpckldqpunpcklwd");
+
+define nasm_is_kw {
+ variable word;
+ variable len;
+ variable list, min, max, pos, cmp;
+
+ word = strlow(());
+ len = strlen(word);
+
+ switch (len)
+ { case 0: return 1; }
+ { case 2: list = nasm_kw_2; }
+ { case 3: list = nasm_kw_3; }
+ { case 4: list = nasm_kw_4; }
+ { case 5: list = nasm_kw_5; }
+ { case 6: list = nasm_kw_6; }
+ { case 7: list = nasm_kw_7; }
+ { case 8: list = nasm_kw_8; }
+ { case 9: list = nasm_kw_9; }
+ { pop(); return 0; }
+
+ min = -1;
+ max = strlen(list) / len;
+ while (max - min >= 2) {
+ pos = (max + min) / 2;
+ cmp = strcmp(word, substr(list, pos * len + 1, len));
+ if (cmp == 0)
+ return 1; % it's a keyword
+ else if (cmp < 0)
+ max = pos; % bottom half
+ else if (cmp > 0)
+ min = pos; % top half
+ }
+ return 0;
+}
+
+define nasm_indent_line() {
+ variable word, len, e;
+
+ e = eolp();
+
+ push_spot();
+ EXIT_BLOCK {
+ pop_spot();
+ if (what_column() <= Nasm_Instruction_Indent)
+ skip_white();
+ }
+
+ bol_skip_white();
+
+ if (orelse
+ {looking_at_char(';')}
+ {looking_at_char('#')}
+ {looking_at_char('[')}) {
+ bol_trim();
+ pop_spot();
+ EXIT_BLOCK {
+ }
+ return;
+ }
+
+ push_mark();
+ skip_chars("0-9a-zA-Z_.");
+ word = bufsubstr();
+
+ if (nasm_is_kw(word)) {
+ bol_trim();
+ whitespace(Nasm_Instruction_Indent);
+ } else {
+ push_spot();
+ bol_trim();
+ pop_spot();
+ len = strlen(word);
+ if (looking_at_char(':')) {
+ go_right_1();
+ len++;
+ }
+ trim();
+ if (e or not(eolp())) {
+ if (len >= Nasm_Instruction_Indent) {
+ pop();
+ whitespace(1);
+ } else
+ whitespace(Nasm_Instruction_Indent - len);
+ if (e) {
+ pop_spot();
+ eol();
+ push_spot();
+ }
+ }
+ }
+}
+
+define nasm_newline_indent {
+ push_spot();
+ bol_skip_white();
+ if (eolp())
+ trim();
+ pop_spot();
+ newline();
+ nasm_indent_line();
+}
+
+define nasm_bol_self_ins {
+ push_spot();
+ bskip_white();
+ bolp();
+ pop_spot();
+
+ call("self_insert_cmd");
+
+ % Grotty: force immediate update of the syntax highlighting.
+ insert_char('.');
+ deln(left(1));
+
+ if (())
+ nasm_indent_line();
+}
+
+define nasm_self_ins_ind {
+ call("self_insert_cmd");
+
+ % Grotty: force immediate update of the syntax highlighting.
+ insert_char('.');
+ deln(left(1));
+
+ nasm_indent_line();
+}
+
+define nasm_insert_comment {
+ variable spc;
+
+ bol_skip_white();
+ if (looking_at_char(';')) {
+ bol_trim();
+ go_right(1);
+ skip_white();
+ return;
+ } else if (eolp()) {
+ bol_trim();
+ insert("; ");
+ return;
+ }
+
+ forever {
+ skip_chars("^;\n'\"");
+ if (looking_at_char('\'')) {
+ go_right_1();
+ skip_chars("^'\n");
+ !if (eolp())
+ go_right_1();
+ } else if (looking_at_char('\"')) {
+ go_right_1();
+ skip_chars("^\"\n");
+ !if (eolp())
+ go_right_1();
+ } else if (looking_at_char(';')) {
+ !if (bolp()) {
+ go_left_1();
+ trim();
+ !if (looking_at_char(';'))
+ go_right_1();
+ }
+ break;
+ } else {
+ break;
+ }
+ }
+ spc = Nasm_Comment_Column - what_column();
+ if (spc < Nasm_Comment_Space)
+ spc = Nasm_Comment_Space;
+ whitespace(spc);
+ if (eolp()) {
+ insert("; ");
+ } else {
+ go_right_1();
+ skip_white();
+ }
+}
+
+$1 = "NASM";
+create_syntax_table($1);
+
+define_syntax (";", "", '%', $1);
+define_syntax ("([", ")]", '(', $1);
+define_syntax ('"', '"', $1);
+define_syntax ('\'', '\'', $1);
+define_syntax ("0-9a-zA-Z_.@#", 'w', $1);
+define_syntax ("-+0-9a-fA-F.xXL", '0', $1);
+define_syntax (",:", ',', $1);
+define_syntax ('#', '#', $1);
+define_syntax ("|^&<>+-*/%~", '+', $1);
+
+set_syntax_flags($1,1);
+
+#ifdef HAS_DFA_SYNTAX
+
+enable_highlight_cache("nasm.dfa", $1);
+define_highlight_rule(";.*$", "comment", $1);
+define_highlight_rule("[A-Za-z_\\.\\?][A-Za-z0-9_\\.\\?\\$#@~]*",
+ "Knormal", $1);
+define_highlight_rule("$([A-Za-z_\\.\\?][A-Za-z0-9_\\.\\?\\$#@~]*)?",
+ "normal", $1);
+define_highlight_rule("[0-9]+(\\.[0-9]*)?([Ee][\\+\\-]?[0-9]*)?",
+ "number", $1);
+define_highlight_rule("[0-9]+[QqBb]", "number", $1);
+define_highlight_rule("(0x|\\$[0-9A-Fa-f])[0-9A-Fa-f]*", "number", $1);
+define_highlight_rule("[0-9A-Fa-f]+[Hh]", "number", $1);
+define_highlight_rule("\"[^\"]*\"", "string", $1);
+define_highlight_rule("\"[^\"]*$", "string", $1);
+define_highlight_rule("'[^']*'", "string", $1);
+define_highlight_rule("'[^']*$", "string", $1);
+define_highlight_rule("[\\(\\)\\[\\],:]*", "delimiter", $1);
+define_highlight_rule("[\\|\\^&<>\\+\\-\\*/%~]*", "operator", $1);
+define_highlight_rule("^[ \t]*#", "PQpreprocess", $1);
+define_highlight_rule("@[0-9A-Za-z_\\.]*", "keyword1", $1);
+define_highlight_rule("[ \t]*", "normal", $1);
+define_highlight_rule(".", "normal", $1);
+build_highlight_table($1);
+
+#endif
+
+define_keywords_n($1, nasm_kw_2, 2, 0);
+define_keywords_n($1, nasm_kw_3, 3, 0);
+define_keywords_n($1, nasm_kw_4, 4, 0);
+define_keywords_n($1, nasm_kw_5, 5, 0);
+define_keywords_n($1, nasm_kw_6, 6, 0);
+define_keywords_n($1, nasm_kw_7, 7, 0);
+define_keywords_n($1, nasm_kw_8, 8, 0);
+define_keywords_n($1, nasm_kw_9, 9, 0);
+
+!if (keymap_p ($1)) make_keymap ($1);
+definekey("nasm_bol_self_ins", ";", $1);
+definekey("nasm_bol_self_ins", "#", $1);
+definekey("nasm_bol_self_ins", "[", $1);
+definekey("nasm_self_ins_ind", ":", $1);
+definekey("nasm_insert_comment", "^[;", $1);
+
+define nasm_mode {
+ set_mode("NASM", 4);
+ use_keymap ("NASM");
+ use_syntax_table ("NASM");
+ set_buffer_hook ("indent_hook", "nasm_indent_line");
+ set_buffer_hook ("newline_indent_hook", "nasm_newline_indent");
+ runhooks("nasm_mode_hook");
+}
diff --git a/names.c b/names.c
new file mode 100644
index 0000000..5b9ae3c
--- /dev/null
+++ b/names.c
@@ -0,0 +1,79 @@
+/* names.c included source file defining instruction and register
+ * names for the Netwide [Dis]Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+static char *reg_names[] = { /* register names, as strings */
+ "\0", "ah", "al", "ax", "bh", "bl", "bp", "bx", "ch", "cl",
+ "cr0", "cr2", "cr3", "cr4", "cs", "cx", "dh", "di", "dl", "dr0",
+ "dr1", "dr2", "dr3", "dr6", "dr7", "ds", "dx", "eax", "ebp",
+ "ebx", "ecx", "edi", "edx", "es", "esi", "esp", "fs", "gs",
+ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "si",
+ "sp", "ss", "st0", "st1", "st2", "st3", "st4", "st5", "st6",
+ "st7", "tr3", "tr4", "tr5", "tr6", "tr7"
+};
+
+static char *insn_names[] = { /* instruction names, as strings */
+ "aaa", "aad", "aam", "aas", "adc", "add", "and", "arpl",
+ "bound", "bsf", "bsr", "bswap", "bt", "btc", "btr", "bts",
+ "call", "cbw", "cdq", "clc", "cld", "cli", "clts", "cmc", "cmp",
+ "cmpsb", "cmpsd", "cmpsw", "cmpxchg", "cmpxchg8b", "cpuid",
+ "cwd", "cwde", "daa", "das", "db", "dd", "dec", "div", "dq",
+ "dt", "dw", "emms", "enter", "equ", "f2xm1", "fabs", "fadd",
+ "faddp", "fbld", "fbstp", "fchs", "fclex", "fcmovb", "fcmovbe",
+ "fcmove", "fcmovnb", "fcmovnbe", "fcmovne", "fcmovnu", "fcmovu",
+ "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp",
+ "fdisi", "fdiv", "fdivp", "fdivr", "fdivrp", "feni", "ffree",
+ "fiadd", "ficom", "ficomp", "fidiv", "fidivr", "fild", "fimul",
+ "fincstp", "finit", "fist", "fistp", "fisub", "fisubr", "fld",
+ "fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2",
+ "fldln2", "fldpi", "fldz", "fmul", "fmulp", "fnop", "fpatan",
+ "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave",
+ "fscale", "fsetpm", "fsin", "fsincos", "fsqrt", "fst", "fstcw",
+ "fstenv", "fstp", "fstsw", "fsub", "fsubp", "fsubr", "fsubrp",
+ "ftst", "fucom", "fucomi", "fucomip", "fucomp", "fucompp",
+ "fxam", "fxch", "fxtract", "fyl2x", "fyl2xp1", "hlt", "icebp",
+ "idiv", "imul", "in", "inc", "insb", "insd", "insw", "int",
+ "int1", "int01", "int3", "into", "invd", "invlpg", "iret",
+ "iretd", "iretw", "jcxz", "jecxz", "jmp", "lahf", "lar", "lds",
+ "lea", "leave", "les", "lfs", "lgdt", "lgs", "lidt", "lldt",
+ "lmsw", "loadall", "lodsb", "lodsd", "lodsw", "loop", "loope",
+ "loopne", "loopnz", "loopz", "lsl", "lss", "ltr", "mov", "movd",
+ "movq", "movsb", "movsd", "movsw", "movsx", "movzx", "mul",
+ "neg", "nop", "not", "or", "out", "outsb", "outsd", "outsw",
+ "packssdw", "packsswb", "packuswb", "paddb", "paddd", "paddsb",
+ "paddsw", "paddusb", "paddusw", "paddw", "pand", "pandn",
+ "pcmpeqb", "pcmpeqd", "pcmpeqw", "pcmpgtb", "pcmpgtd",
+ "pcmpgtw", "pmaddwd", "pmulhw", "pmullw", "pop", "popa",
+ "popad", "popaw", "popf", "popfd", "popfw", "por", "pslld",
+ "psllq", "psllw", "psrad", "psraw", "psrld", "psrlq", "psrlw",
+ "psubb", "psubd", "psubsb", "psubsw", "psubusb", "psubusw",
+ "psubw", "punpckhbw", "punpckhdq", "punpckhwd", "punpcklbw",
+ "punpckldq", "punpcklwd", "push", "pusha", "pushad", "pushaw",
+ "pushf", "pushfd", "pushfw", "pxor", "rcl", "rcr", "rdmsr",
+ "rdpmc", "rdtsc", "resb", "resd", "resq", "rest", "resw", "ret",
+ "retf", "retn", "rol", "ror", "rsm", "sahf", "sal", "salc",
+ "sar", "sbb", "scasb", "scasd", "scasw", "sgdt", "shl", "shld",
+ "shr", "shrd", "sidt", "sldt", "smsw", "stc", "std", "sti",
+ "stosb", "stosd", "stosw", "str", "sub", "test", "umov", "verr",
+ "verw", "wait", "wbinvd", "wrmsr", "xadd", "xchg", "xlatb",
+ "xor"
+};
+
+static char *icn[] = { /* conditional instructions */
+ "cmov", "j", "set"
+};
+
+static int ico[] = { /* and the corresponding opcodes */
+ I_CMOVcc, I_Jcc, I_SETcc
+};
+
+static char *conditions[] = { /* condition code names */
+ "a", "ae", "b", "be", "c", "e", "g", "ge", "l", "le", "na", "nae",
+ "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no", "np",
+ "ns", "nz", "o", "p", "pe", "po", "s", "z"
+};
diff --git a/nasm.c b/nasm.c
new file mode 100644
index 0000000..f4c75c4
--- /dev/null
+++ b/nasm.c
@@ -0,0 +1,648 @@
+/* The Netwide Assembler main program module
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "parser.h"
+#include "assemble.h"
+#include "labels.h"
+#include "outform.h"
+
+static void report_error (int, char *, ...);
+static void parse_cmdline (int, char **);
+static void assemble_file (char *);
+static int getkw (char *buf, char **value);
+static void register_output_formats(void);
+static void usage(void);
+
+static char *obuf;
+static char inname[FILENAME_MAX];
+static char outname[FILENAME_MAX];
+static char realout[FILENAME_MAX];
+static int lineno; /* for error reporting */
+static int pass;
+static struct ofmt *ofmt = NULL;
+
+static FILE *ofile = NULL;
+static int sb = 16; /* by default */
+
+static long current_seg;
+static struct RAA *offsets;
+static long abs_offset;
+#define OFFSET_DELTA 256
+
+/*
+ * get/set current offset...
+ */
+#define get_curr_ofs (current_seg==NO_SEG?abs_offset:\
+ raa_read(offsets,current_seg))
+#define set_curr_ofs(x) (current_seg==NO_SEG?(void)(abs_offset=(x)):\
+ (void)(offsets=raa_write(offsets,current_seg,(x))))
+
+static int want_usage;
+static int terminate_after_phase;
+
+int main(int argc, char **argv) {
+ want_usage = terminate_after_phase = FALSE;
+
+ nasm_set_malloc_error (report_error);
+ offsets = raa_init();
+
+ seg_init();
+
+ register_output_formats();
+
+ parse_cmdline(argc, argv);
+
+ if (terminate_after_phase) {
+ if (want_usage)
+ usage();
+ return 1;
+ }
+
+ if (!*outname) {
+ ofmt->filename (inname, realout, report_error);
+ strcpy(outname, realout);
+ }
+
+ ofile = fopen(outname, "wb");
+ if (!ofile) {
+ report_error (ERR_FATAL | ERR_NOFILE,
+ "unable to open output file `%s'", outname);
+ }
+ ofmt->init (ofile, report_error, define_label);
+ assemble_file (inname);
+ if (!terminate_after_phase) {
+ ofmt->cleanup ();
+ cleanup_labels ();
+ }
+ fclose (ofile);
+ if (terminate_after_phase)
+ remove(outname);
+
+ if (want_usage)
+ usage();
+
+ return 0;
+}
+
+static void parse_cmdline(int argc, char **argv) {
+ char *param;
+
+ *inname = *outname = '\0';
+ while (--argc) {
+ char *p = *++argv;
+ if (p[0]=='-') {
+ switch (p[1]) {
+ case 'o': /* these parameters take values */
+ case 'f':
+ if (p[2]) /* the parameter's in the option */
+ param = p+2;
+ else if (!argv[1]) {
+ report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "option `-%c' requires an argument",
+ p[1]);
+ break;
+ } else
+ --argc, param = *++argv;
+ if (p[1]=='o') { /* output file */
+ strcpy (outname, param);
+ } else if (p[1]=='f') { /* output format */
+ ofmt = ofmt_find(param);
+ if (!ofmt) {
+ report_error (ERR_FATAL | ERR_NOFILE | ERR_USAGE,
+ "unrecognised output format `%s'",
+ param);
+ }
+ }
+ break;
+ case 'h':
+ fprintf(stderr,
+ "usage: nasm [-o outfile] [-f format] filename\n");
+ fprintf(stderr,
+ " or nasm -r for version info\n\n");
+ fprintf(stderr,
+ "valid output formats for -f are"
+ " (`*' denotes default):\n");
+ ofmt_list(ofmt);
+ exit (0); /* never need usage message here */
+ break;
+ case 'r':
+ fprintf(stderr, "NASM version %s\n", NASM_VER);
+ exit (0); /* never need usage message here */
+ break;
+ default:
+ report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "unrecognised option `-%c'",
+ p[1]);
+ break;
+ }
+ } else {
+ if (*inname) {
+ report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "more than one input file specified");
+ } else
+ strcpy(inname, p);
+ }
+ }
+ if (!*inname)
+ report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "no input file specified");
+}
+
+/* used by error function to report location */
+static char currentfile[FILENAME_MAX];
+
+static void assemble_file (char *fname) {
+ FILE *fp = fopen (fname, "r");
+ FILE *oldfile = NULL; /* jrh - used when processing include files */
+ int oldfileline = 0;
+ char *value, *p, buffer[1024+2]; /* maximum line length defined here */
+ insn output_ins;
+ int i, seg, rn_error;
+
+ if (!fp) { /* couldn't open file */
+ report_error (ERR_FATAL | ERR_NOFILE,
+ "unable to open input file `%s'", fname);
+ return;
+ }
+
+ init_labels ();
+ strcpy(currentfile,fname);
+
+ /* pass one */
+ pass = 1;
+ current_seg = ofmt->section(NULL, pass, &sb);
+ lineno = 0;
+ while (1) {
+ if (! fgets(buffer, sizeof(buffer), fp)) { /* EOF on current file */
+ if (oldfile) {
+ fclose(fp);
+ fp = oldfile;
+ lineno = oldfileline;
+ strcpy(currentfile,fname);
+ oldfile = NULL;
+ continue;
+ }
+ else
+ break;
+ }
+ lineno++;
+ if (buffer[strlen(buffer)-1] == '\n') {
+ buffer[strlen(buffer)-1] = '\0';
+ } else {
+ /*
+ * We have a line that's too long. Throw an error, read
+ * to EOL, and ignore the line for assembly purposes.
+ */
+ report_error (ERR_NONFATAL, "line is longer than %d characters",
+ sizeof(buffer)-2);
+ while (fgets(buffer, sizeof(buffer), fp) &&
+ buffer[strlen(buffer)-1] != '\n');
+ continue; /* read another line */
+ }
+
+ /* here we parse our directives; this is not handled by the 'real'
+ * parser. */
+
+ if ( (i = getkw (buffer, &value)) ) {
+ switch (i) {
+ case 1: /* [SEGMENT n] */
+ seg = ofmt->section (value, pass, &sb);
+ if (seg == NO_SEG) {
+ report_error (ERR_NONFATAL,
+ "segment name `%s' not recognised",
+ value);
+ } else {
+ current_seg = seg;
+ }
+ break;
+ case 2: /* [EXTERN label] */
+ if (*value == '$')
+ value++; /* skip initial $ if present */
+ declare_as_global (value, report_error);
+ define_label (value, seg_alloc(), 0L, ofmt, report_error);
+ break;
+ case 3: /* [BITS bits] */
+ switch (atoi(value)) {
+ case 16:
+ case 32:
+ sb = atoi(value);
+ break;
+ default:
+ report_error(ERR_NONFATAL,
+ "`%s' is not a valid argument to [BITS]",
+ value);
+ break;
+ }
+ break;
+ case 4: /* [INC file] */
+ oldfile = fp;
+ oldfileline = lineno;
+ lineno = 0;
+ strcpy(currentfile,value);
+ fp = fopen(value,"r");
+ if (!fp) {
+ lineno = oldfileline;
+ fp = oldfile;
+ strcpy(currentfile,fname);
+ report_error (ERR_FATAL,
+ "unable to open include file `%s'\n",
+ value);
+ }
+ break;
+ case 5: /* [GLOBAL symbol] */
+ if (*value == '$')
+ value++; /* skip initial $ if present */
+ declare_as_global (value, report_error);
+ break;
+ case 6: /* [COMMON symbol size] */
+ p = value;
+ while (*p && !isspace(*p))
+ p++;
+ if (*p) {
+ long size;
+
+ while (*p && isspace(*p))
+ *p++ = '\0';
+ size = readnum (p, &rn_error);
+ if (rn_error)
+ report_error (ERR_NONFATAL, "invalid size specified"
+ " in COMMON declaration");
+ else
+ define_common (value, seg_alloc(), size,
+ ofmt, report_error);
+ } else
+ report_error (ERR_NONFATAL, "no size specified in"
+ " COMMON declaration");
+ break;
+ case 7: /* [ABSOLUTE address] */
+ current_seg = NO_SEG;
+ abs_offset = readnum(value, &rn_error);
+ if (rn_error) {
+ report_error (ERR_NONFATAL, "invalid address specified"
+ " for ABSOLUTE directive");
+ abs_offset = 0x100;/* don't go near zero in case of / */
+ }
+ break;
+ default:
+ if (!ofmt->directive (buffer+1, value, 1))
+ report_error (ERR_NONFATAL, "unrecognised directive [%s]",
+ buffer+1);
+ break;
+ }
+ } else {
+ long offs = get_curr_ofs;
+ parse_line (current_seg, offs, lookup_label,
+ 1, buffer, &output_ins, ofmt, report_error);
+ if (output_ins.opcode == I_EQU) {
+ /*
+ * Special `..' EQUs get processed in pass two.
+ */
+ if (!output_ins.label)
+ report_error (ERR_NONFATAL,
+ "EQU not preceded by label");
+ else if (output_ins.label[0] != '.' ||
+ output_ins.label[1] != '.') {
+ if (output_ins.operands == 1 &&
+ (output_ins.oprs[0].type & IMMEDIATE)) {
+ define_label (output_ins.label,
+ output_ins.oprs[0].segment,
+ output_ins.oprs[0].offset,
+ ofmt, report_error);
+ } else if (output_ins.operands == 2 &&
+ (output_ins.oprs[0].type & IMMEDIATE) &&
+ (output_ins.oprs[0].type & COLON) &&
+ output_ins.oprs[0].segment == NO_SEG &&
+ (output_ins.oprs[1].type & IMMEDIATE) &&
+ output_ins.oprs[1].segment == NO_SEG) {
+ define_label (output_ins.label,
+ output_ins.oprs[0].offset | SEG_ABS,
+ output_ins.oprs[1].offset,
+ ofmt, report_error);
+ } else
+ report_error(ERR_NONFATAL, "bad syntax for EQU");
+ }
+ } else {
+ if (output_ins.label)
+ define_label (output_ins.label,
+ current_seg, offs,
+ ofmt, report_error);
+ offs += insn_size (current_seg, offs, sb,
+ &output_ins, report_error);
+ set_curr_ofs (offs);
+ }
+ cleanup_insn (&output_ins);
+ }
+ }
+
+ if (terminate_after_phase) {
+ fclose(ofile);
+ remove(outname);
+ if (want_usage)
+ usage();
+ exit (1);
+ }
+
+ /* pass two */
+ pass = 2;
+ rewind (fp);
+ current_seg = ofmt->section(NULL, pass, &sb);
+ raa_free (offsets);
+ offsets = raa_init();
+ lineno = 0;
+ while (1) {
+ if (!fgets(buffer, sizeof(buffer), fp)) {
+ if (oldfile) {
+ fclose(fp);
+ fp = oldfile;
+ lineno = oldfileline;
+ strcpy(currentfile,fname);
+ oldfile = NULL;
+ continue;
+ } else
+ break;
+ }
+ lineno++;
+ if (buffer[strlen(buffer)-1] == '\n')
+ buffer[strlen(buffer)-1] = '\0';
+ else
+ report_error (ERR_PANIC,
+ "too-long line got through from pass one");
+
+ /* here we parse our directives; this is not handled by
+ * the 'real' parser. */
+
+ if ( (i = getkw (buffer, &value)) ) {
+ switch (i) {
+ case 1: /* [SEGMENT n] */
+ seg = ofmt->section (value, pass, &sb);
+ if (seg == NO_SEG) {
+ report_error (ERR_PANIC,
+ "invalid segment name on pass two");
+ } else
+ current_seg = seg;
+ break;
+ case 2: /* [EXTERN label] */
+ break;
+ case 3: /* [BITS bits] */
+ switch (atoi(value)) {
+ case 16:
+ case 32:
+ sb = atoi(value);
+ break;
+ default:
+ report_error(ERR_PANIC,
+ "invalid [BITS] value on pass two",
+ value);
+ break;
+ }
+ break;
+ case 4:
+ oldfile = fp;
+ oldfileline = lineno;
+ lineno = 0;
+ strcpy(currentfile,value);
+ fp = fopen(value,"r");
+ if (!fp) {
+ lineno = oldfileline;
+ fp = oldfile;
+ strcpy(currentfile,fname);
+ /*
+ * We don't report this error in the PANIC
+ * class, even though we might expect to have
+ * already picked it up during pass one,
+ * because of the tiny chance that some other
+ * process may have removed the include file
+ * between the passes.
+ */
+ report_error (ERR_FATAL,
+ "unable to open include file `%s'\n",
+ value);
+ }
+ break;
+ case 5: /* [GLOBAL symbol] */
+ break;
+ case 6: /* [COMMON symbol size] */
+ break;
+ case 7: /* [ABSOLUTE addr] */
+ current_seg = NO_SEG;
+ abs_offset = readnum(value, &rn_error);
+ if (rn_error)
+ report_error (ERR_PANIC, "invalid ABSOLUTE address "
+ "in pass two");
+ break;
+ default:
+ if (!ofmt->directive (buffer+1, value, 2))
+ report_error (ERR_PANIC, "invalid directive on pass two");
+ break;
+ }
+ } else {
+ long offs = get_curr_ofs;
+ parse_line (current_seg, offs, lookup_label, 2,
+ buffer, &output_ins, ofmt, report_error);
+ obuf = buffer;
+ if (output_ins.label)
+ define_label_stub (output_ins.label, report_error);
+ if (output_ins.opcode == I_EQU) {
+ /*
+ * Special `..' EQUs get processed here.
+ */
+ if (output_ins.label[0] == '.' &&
+ output_ins.label[1] == '.') {
+ if (output_ins.operands == 1 &&
+ (output_ins.oprs[0].type & IMMEDIATE)) {
+ define_label (output_ins.label,
+ output_ins.oprs[0].segment,
+ output_ins.oprs[0].offset,
+ ofmt, report_error);
+ } else if (output_ins.operands == 2 &&
+ (output_ins.oprs[0].type & IMMEDIATE) &&
+ (output_ins.oprs[0].type & COLON) &&
+ output_ins.oprs[0].segment == NO_SEG &&
+ (output_ins.oprs[1].type & IMMEDIATE) &&
+ output_ins.oprs[1].segment == NO_SEG) {
+ define_label (output_ins.label,
+ output_ins.oprs[0].offset | SEG_ABS,
+ output_ins.oprs[1].offset,
+ ofmt, report_error);
+ } else
+ report_error(ERR_NONFATAL, "bad syntax for EQU");
+ }
+ }
+ offs += assemble (current_seg, offs, sb,
+ &output_ins, ofmt, report_error);
+ cleanup_insn (&output_ins);
+ set_curr_ofs (offs);
+ }
+ }
+}
+
+static int getkw (char *buf, char **value) {
+ char *p, *q;
+
+ if (*buf!='[')
+ return 0;
+ p = buf;
+ while (*p && *p != ']') p++;
+ if (!*p)
+ return 0;
+ q = p++;
+ while (*p && *p != ';') {
+ if (!isspace(*p))
+ return 0;
+ p++;
+ }
+ q[1] = '\0';
+
+ p = buf+1;
+ while (*buf && *buf!=' ' && *buf!=']' && *buf!='\t')
+ buf++;
+ if (*buf==']') {
+ *buf = '\0';
+ *value = buf;
+ } else {
+ *buf++ = '\0';
+ *value = buf;
+ while (*buf!=']') buf++;
+ *buf++ = '\0';
+ }
+ for (q=p; *q; q++)
+ *q = tolower(*q);
+ if (!strcmp(p, "segment") || !strcmp(p, "section"))
+ return 1;
+ if (!strcmp(p, "extern"))
+ return 2;
+ if (!strcmp(p, "bits"))
+ return 3;
+ if (!strcmp(p, "inc") || !strcmp(p, "include"))
+ return 4;
+ if (!strcmp(p, "global"))
+ return 5;
+ if (!strcmp(p, "common"))
+ return 6;
+ if (!strcmp(p, "absolute"))
+ return 7;
+ return -1;
+}
+
+static void report_error (int severity, char *fmt, ...) {
+ va_list ap;
+
+ if (severity & ERR_NOFILE)
+ fputs ("nasm: ", stderr);
+ else
+ fprintf (stderr, "%s:%d: ", currentfile, lineno);
+
+ if ( (severity & ERR_MASK) == ERR_WARNING)
+ fputs ("warning: ", stderr);
+ else if ( (severity & ERR_MASK) == ERR_PANIC)
+ fputs ("panic: ", stderr);
+
+ va_start (ap, fmt);
+ vfprintf (stderr, fmt, ap);
+ fputc ('\n', stderr);
+
+ if (severity & ERR_USAGE)
+ want_usage = TRUE;
+
+ switch (severity & ERR_MASK) {
+ case ERR_WARNING:
+ /* no further action, by definition */
+ break;
+ case ERR_NONFATAL:
+ terminate_after_phase = TRUE;
+ break;
+ case ERR_FATAL:
+ fclose(ofile);
+ remove(outname);
+ if (want_usage)
+ usage();
+ exit(1); /* instantly die */
+ break; /* placate silly compilers */
+ case ERR_PANIC:
+ abort(); /* panic and dump core */
+ break;
+ }
+}
+
+static void usage(void) {
+ fputs("type `nasm -h' for help\n", stderr);
+}
+
+static void register_output_formats(void) {
+ /* Flat-form binary format */
+#ifdef OF_BIN
+ extern struct ofmt of_bin;
+#endif
+ /* Unix formats: a.out, COFF, ELF */
+#ifdef OF_AOUT
+ extern struct ofmt of_aout;
+#endif
+#ifdef OF_COFF
+ extern struct ofmt of_coff;
+#endif
+#ifdef OF_ELF
+ extern struct ofmt of_elf;
+#endif
+ /* Linux strange format: as86 */
+#ifdef OF_AS86
+ extern struct ofmt of_as86;
+#endif
+ /* DOS formats: OBJ, Win32 */
+#ifdef OF_OBJ
+ extern struct ofmt of_obj;
+#endif
+#ifdef OF_WIN32
+ extern struct ofmt of_win32;
+#endif
+#ifdef OF_RDF
+ extern struct ofmt of_rdf;
+#endif
+#ifdef OF_DBG /* debug format must be included specifically */
+ extern struct ofmt of_dbg;
+#endif
+
+#ifdef OF_BIN
+ ofmt_register (&of_bin);
+#endif
+#ifdef OF_AOUT
+ ofmt_register (&of_aout);
+#endif
+#ifdef OF_COFF
+ ofmt_register (&of_coff);
+#endif
+#ifdef OF_ELF
+ ofmt_register (&of_elf);
+#endif
+#ifdef OF_AS86
+ ofmt_register (&of_as86);
+#endif
+#ifdef OF_OBJ
+ ofmt_register (&of_obj);
+#endif
+#ifdef OF_WIN32
+ ofmt_register (&of_win32);
+#endif
+#ifdef OF_RDF
+ ofmt_register (&of_rdf);
+#endif
+#ifdef OF_DBG
+ ofmt_register (&of_dbg);
+#endif
+ /*
+ * set the default format
+ */
+ ofmt = &OF_DEFAULT;
+}
diff --git a/nasm.doc b/nasm.doc
new file mode 100644
index 0000000..dd2073b
--- /dev/null
+++ b/nasm.doc
@@ -0,0 +1,996 @@
+ The Netwide Assembler, NASM
+ ===========================
+
+Introduction
+============
+
+The Netwide Assembler grew out of an idea on comp.lang.asm.x86 (or
+possibly alt.lang.asm, I forget which), which was essentially that
+there didn't seem to be a good free x86-series assembler around, and
+that maybe someone ought to write one.
+
+- A86 is good, but not free, and in particular you don't get any
+ 32-bit capability until you pay. It's DOS only, too.
+
+- GAS is free, and ports over DOS/Unix, but it's not very good,
+ since it's designed to be a back end to gcc, which always feeds it
+ correct code. So its error checking is minimal. Also its syntax is
+ horrible, from the point of view of anyone trying to actually
+ _write_ anything in it. Plus you can't write 16-bit code in it.
+
+- AS86 is Linux specific, and (my version at least) doesn't seem to
+ have much (or any) documentation.
+
+- MASM isn't very good. And it's expensive. And it runs only under
+ DOS.
+
+- TASM is better, but still strives for MASM compatibility, which
+ means millions of directives and tons of red tape. And its syntax
+ is essentially MASM's, with the contradictions and quirks that
+ entails (although it sorts out some of those by means of Ideal
+ mode). It's expensive too. And it's DOS only.
+
+So here, for your coding pleasure, is NASM. At present it's still in
+prototype stage - we don't promise that it can outperform any of
+these assemblers. But please, _please_ send us bug reports and fixes
+and anything else you can get your hands on, and we'll improve it
+out of all recognition. Again.
+
+Please see the file `Licence' for the legalese.
+
+Getting Started: Installation
+=============================
+
+NASM is distributed in source form, in what we hope is totally
+ANSI-compliant C. It uses no non-portable code at all, that we know
+of. It ought to compile without change on any system you care to try
+it on. We also supply a pre-compiled 16-bit DOS binary.
+
+To install it, edit the Makefile to describe your C compiler, and
+type `make'. Then copy the binary to somewhere on your path. That's
+all - NASM relies on no files other than its own executable.
+Although if you're on a Unix system, you may also want to install
+the NASM manpage (`nasm.1'). You may also want to install the binary
+and manpage for the Netwide Disassembler, NDISASM (also see
+`ndisasm.doc').
+
+Running NASM
+============
+
+To assemble a file, you issue a command of the form
+
+ nasm -f <format> <filename> [-o <output>]
+
+For example,
+
+ nasm -f elf myfile.asm
+
+will assemble `myfile.asm' into an ELF object file `myfile.o'. And
+
+ nasm -f bin myfile.asm -o myfile.com
+
+will assemble `myfile.asm' into a raw binary program `myfile.com'.
+
+To get usage instructions from NASM, try typing `nasm -h'. This will
+also list the available output file formats, and what they are.
+
+If you use Linux but aren't sure whether your system is a.out or
+ELF, type `file /usr/bin/nasm' or wherever you put the NASM binary.
+If it says something like
+
+/usr/bin/nasm: ELF 32-bit LSB executable i386 (386 and up) Version 1
+
+then your system is ELF, and you should use `-f elf' when you want
+NASM to produce Linux object files. If it says
+
+/usr/bin/nasm: Linux/i386 demand-paged executable (QMAGIC)
+
+or something similar, your system is a.out, and you should use `-f
+aout' instead.
+
+Like Unix compilers and assemblers, NASM is silent unless it goes
+wrong: you won't see any output at all, unless it gives error
+messages.
+
+Writing Programs with NASM
+==========================
+
+Each line of a NASM source file should contain some combination of
+the four fields
+
+LABEL: INSTRUCTION OPERANDS ; COMMENT
+
+`LABEL' defines a label pointing to that point in the source. There
+are no restrictions on white space: labels may have white space
+before them, or not, as you please. The colon after the label is
+also optional.
+
+Valid characters in labels are letters, numbers, `_', `$', `#', `@',
+`~', `?', and `.'. The only characters which may be used as the
+_first_ character of an identifier are letters, `_' and `?', and
+(with special meaning: see `Local Labels') `.'. An identifier may
+also be prefixed with a $ sign to indicate that it is intended to be
+read as an identifier and not a reserved word; thus, if some other
+module you are linking with defines a symbol `eax', you can refer to
+`$eax' in NASM code to distinguish it from the register name.
+
+`INSTRUCTION' can be any machine opcode (Pentium and P6 opcodes, FPU
+opcodes, MMX opcodes and even undocumented opcodes are all
+supported). The instruction may be prefixed by LOCK, REP, REPE/REPZ
+or REPNE/REPNZ, in the usual way. Explicit address-size and operand-
+size prefixes A16, A32, O16 and O32 are provided - one example of
+their use is given in the `Unusual Instruction Sizes' section below.
+You can also use a segment register as a prefix: coding `es mov
+[bx],ax' is equivalent to coding `mov [es:bx],ax'. We recommend the
+latter syntax, since it is consistent with other syntactic features
+of the language, but for instructions such as `lodsb' there isn't
+anywhere to put a segment override except as a prefix. This is why
+we support it.
+
+The `INSTRUCTION' field may also contain some pseudo-opcodes: see
+the section on pseudo-opcodes for details.
+
+`OPERANDS' can be nonexistent, or huge, depending on the
+instruction, of course. When operands are registers, they are given
+simply as register names: `eax', `ss', `di' for example. NASM does
+_not_ use the GAS syntax, in which register names are prefixed by a
+`%' sign. Operands may also be effective addresses, or they may be
+constants or expressions. See the separate sections on these for
+details.
+
+`COMMENT' is anything after the first semicolon on the line,
+excluding semicolons inside quoted strings.
+
+Of course, all these fields are optional: the presence or absence of
+the OPERANDS field is required by the nature of the INSTRUCTION
+field, but any line may contain a LABEL or not, may contain an
+INSTRUCTION or not, and may contain a COMMENT or not, independently
+of each other.
+
+Lines may also contain nothing but a directive: see `Assembler
+Directives' below for details.
+
+NASM can currently not handle any line longer than 1024 characters.
+This may be fixed in a future release.
+
+Floating Point Instructions
+===========================
+
+NASM has support for assembling FPU opcodes. However, its syntax is
+not necessarily the same as anyone else's.
+
+NASM uses the notation `st0', `st1', etc. to denote the FPU stack
+registers. NASM also accepts a wide range of single-operand and
+two-operand forms of the instructions. For people who wish to use
+the single-operand form exclusively (this is in fact the `canonical'
+form from NASM's point of view, in that it is the form produced by
+the Netwide Disassembler), there is a TO keyword which makes
+available the opcodes which cannot be so easily accessed by one
+operand. Hence:
+
+ fadd st1 ; this sets st0 := st0 + st1
+ fadd st0,st1 ; so does this
+ fadd st1,st0 ; this sets st1 := st1 + st0
+ fadd to st1 ; so does this
+
+It's also worth noting that the FPU instructions that reference
+memory must use the prefixes DWORD, QWORD or TWORD to indicate what
+size of memory operand they refer to.
+
+NASM, in keeping with our policy of not trying to second-guess the
+programmer, will _never_ automatically insert WAIT instructions into
+your code stream. You must code WAIT yourself before _any_
+instruction that needs it. (Of course, on 286 processors or above,
+it isn't needed anyway...)
+
+NASM supports specification of floating point constants by means of
+`dd' (single precision), `dq' (double precision) and `dt' (extended
+precision). Floating-point _arithmetic_ is not done, due to
+portability constraints (not all platforms on which NASM can be run
+support the same floating point types), but simple constants can be
+specified. For example:
+
+gamma dq 0.5772156649 ; Euler's constant
+
+Pseudo-Opcodes
+==============
+
+Pseudo-opcodes are not real x86 machine opcodes, but are used in the
+instruction field anyway because that's the most convenient place to
+put them. The current pseudo-opcodes are DB, DW and DD, their
+uninitialised counterparts RESB, RESW and RESD, the EQU command, and
+the TIMES prefix.
+
+DB, DW and DD work as you would expect: they can each take an
+arbitrary number of operands, and when assembled, they generate
+nothing but those operands. All three of them can take string
+constants as operands, which no other instruction can currently do.
+See the `Constants' section for details about string constants.
+
+RESB, RESW and RESD are designed to be used in the BSS section of a
+module: they declare _uninitialised_ storage space. Each takes a
+single operand, which is the number of bytes, words or doublewords
+to reserve. We do not support the MASM/TASM syntax of reserving
+uninitialised space by writing `DW ?' or similar: this is what we do
+instead. (But see `Critical Expressions' for a caveat on the nature
+of the operand.)
+
+(An aside: if you want to be able to write `DW ?' and have something
+vaguely useful happen, you can always code `? EQU 0'...)
+
+EQU defines a symbol to a specified value: when EQU is used, the
+LABEL field must be present. The action of EQU is to define the
+given label name to the value of its (only) operand. This definition
+is absolute, and cannot change later. So, for example,
+
+message db 'hello, world'
+msglen equ $-message
+
+defines `msglen' to be the constant 12. `msglen' may not then be
+redefined later. This is not a preprocessor definition either: the
+value of `msglen' is evaluated _once_, using the value of `$' (see
+the section `Expressions' for details of `$') at the point of
+definition, rather than being evaluated wherever it is referenced
+and using the value of `$' at the point of reference. Note that the
+caveat in `Critical Expressions' applies to EQU too, at the moment.
+
+Finally, the TIMES prefix causes the instruction to be assembled
+multiple times. This is partly NASM's equivalent of the DUP syntax
+supported by MASM-compatible assemblers, in that one can do
+
+zerobuf: times 64 db 0
+
+or similar, but TIMES is more versatile than that. TIMES takes not
+just a numeric constant, but a numeric _expression_, so one can do
+things like
+
+buffer: db 'hello, world'
+ times 64-$+buffer db ' '
+
+which will store exactly enough spaces to make the total length of
+`buffer' up to 64. (See the section `Critical Expressions' for a
+caveat on the use of TIMES.) Finally, TIMES can be applied to
+ordinary opcodes, so you can code trivial unrolled loops in it:
+
+ times 100 movsb
+
+Note that there is no effective difference between `times 100 resb
+1' and `resb 100'.
+
+Effective Addresses
+===================
+
+NASM's addressing scheme is very simple, although it can involve
+more typing than other assemblers. Where other assemblers
+distinguish between a _variable_ (label declared without a colon)
+and a _label_ (declared with a colon), and use different means of
+addressing the two, NASM is totally consistent.
+
+To refer to the contents of a memory location, square brackets are
+required. This applies to simple variables, computed offsets,
+segment overrides, effective addresses - _everything_. E.g.:
+
+wordvar dw 123
+ mov ax,[wordvar]
+ mov ax,[wordvar+1]
+ mov ax,[es:wordvar+bx]
+
+NASM does _not_ support the various strange syntaxes used by MASM
+and others, such as
+
+ mov ax,wordvar ; this is legal, but means something else
+ mov ax,es:wordvar[bx] ; not even slightly legal
+ es mov ax,wordvar[1] ; the prefix is OK, but not the rest
+
+If no square brackets are used, NASM interprets label references to
+mean the address of the label. Hence there is no need for MASM's
+OFFSET keyword, but
+
+ mov ax,wordvar
+
+loads AX with the _address_ of the variable `wordvar'.
+
+More complicated effective addresses are handled by enclosing them
+within square brackets as before:
+
+ mov eax,[ebp+2*edi+offset]
+ mov ax,[bx+di+8]
+
+NASM will cope with some fairly strange effective addresses, if you
+try it: provided your effective address expression evaluates
+_algebraically_ to something that the instruction set supports, it
+will be able to assemble it. For example,
+
+ mov eax,[ebx*5] ; actually assembles to [ebx+ebx*4]
+ mov ax,[bx-si+2*si] ; actually assembles to [bx+si]
+
+will both work.
+
+There is an ambiguity in the instruction set, which allows two forms
+of 32-bit effective address with equivalent meaning:
+
+ mov eax,[2*eax+0]
+ mov eax,[eax+eax]
+
+These two expressions clearly refer to the same address. The
+difference is that the first one, if assembled `as is', requires a
+four-byte offset to be stored as part of the instruction, so it
+takes up more space. NASM will generate the second (smaller) form
+for both of the above instructions, in an effort to save space.
+There is not, currently, any means for forcing NASM to generate the
+larger form of the instruction.
+
+Mixing 16 and 32 Bit Code: Unusual Instruction Sizes
+====================================================
+
+A number of assemblers seem to have trouble assembling instructions
+that use a different operand or address size from the one they are
+expecting; as86 is a good example, even though the Linux kernel boot
+process (which is assembled using as86) needs several such
+instructions and as86 can't do them.
+
+Instructions such as `mov eax,2' in 16-bit mode are easy, of course,
+and NASM can do them just as well as any other assembler. The
+difficult instructions are things like far jumps.
+
+Suppose you are in a 16-bit segment, in protected mode, and you want
+to execute a far jump to a point in a 32-bit segment. You need to
+code a 32-bit far jump in a 16-bit segment; not many assemblers I
+know of will easily support this. NASM can, by means of the `word'
+and `dword' specifiers. So you can code
+
+ call 1234h:5678h ; this uses the default segment size
+ call word 1234h:5678h ; this is guaranteed to be 16-bit
+ call dword 1234h:56789ABCh ; and this is guaranteed 32-bit
+
+and NASM will generate correct code for them.
+
+Similarly, if you are coding in a 16-bit code segment, but trying to
+access memory in a 32-bit data segment, your effective addresses
+will want to be 32-bit. Of course as soon as you specify an
+effective address containing a 32-bit register, like `[eax]', the
+addressing is forced to be 32-bit anyway. But if you try to specify
+a simple offset, such as `[label]' or `[0x10000]', you will get the
+default address size, which in this case will be wrong. However,
+NASM allows you to code `[dword 0x10000]' to force a 32-bit address
+size, or conversely `[word wlabel]' to force 16 bits.
+
+Be careful not to confuse `word' and `dword' _inside_ the square
+brackets with _outside_: consider the instruction
+
+ mov word [dword 0x123456],0x7890
+
+which moves 16 bits of data to an address specified by a 32-bit
+offset. There is no contradiction between the `word' and `dword' in
+this instruction, since they modify different aspects of the
+functionality. Or, even more confusingly,
+
+ call dword far [fs:word 0x4321]
+
+which takes an address specified by a 16-bit offset, and extracts a
+48-bit DWORD FAR pointer from it to call.
+
+Using this effective-address syntax, the `dword' or `word' override
+may come before or after the segment override if any: NASM isn't
+fussy. Hence:
+
+ mov ax,[fs:dword 0x123456]
+ mov ax,[dword fs:0x123456]
+
+are equivalent forms, and generate the same code.
+
+The LOOP instruction comes in strange sizes, too: in a 16-bit
+segment it uses CX as its count register by default, and in a 32-bit
+segment it uses ECX. But it's possible to do either one in the other
+segment, and NASM will cope by letting you specify the count
+register as a second operand:
+
+ loop label ; uses CX or ECX depending on mode
+ loop label,cx ; always uses CX
+ loop label,ecx ; always uses ECX
+
+Finally, the string instructions LODSB, STOSB, MOVSB, CMPSB, SCASB,
+INSB, and OUTSB can all have strange address sizes: typically, in a
+16-bit segment they read from [DS:SI] and write to [ES:DI], and in a
+32-bit segment they read from [DS:ESI] and write to [ES:EDI].
+However, this can be changed by the use of the explicit address-size
+prefixes `a16' and `a32'. These prefixes generate null code if used
+in the same size segment as they specify, but generate an 0x67
+prefix otherwise. Hence `a16' generates no code in a 16-bit segment,
+but 0x67 in a 32-bit one, and vice versa. So `a16 lodsb' will always
+generate code to read a byte from [DS:SI], no matter what the size
+of the segment. There are also explicit operand-size override
+prefixes, `o16' and `o32', which will optionally generate 0x66
+bytes, but these are provided for completeness and should never have
+to be used.
+
+Constants
+=========
+
+NASM can accept three kinds of constant: _numeric_, _character_ and
+_string_ constants.
+
+Numeric constants are simply numbers. NASM supports a variety of
+syntaxes for expressing numbers in strange bases: you can do any of
+
+ 100 ; this is decimal
+ 0x100 ; hex
+ 100h ; hex as well
+ $100 ; hex again
+ 100q ; octal
+ 100b ; binary
+
+NASM does not support A86's syntax of treating anything with a
+leading zero as hex, nor does it support the C syntax of treating
+anything with a leading zero as octal. Leading zeros make no
+difference to NASM. (Except that, as usual, if you have a hex
+constant beginning with a letter, and you want to use the trailing-H
+syntax to represent it, you have to use a leading zero so that NASM
+will recognise it as a number instead of a label.)
+
+The `x' in `0x100', and the trailing `h', `q' and `b', may all be
+upper case if you want.
+
+Character constants consist of up to four characters enclosed in
+single or double quotes. No escape character is defined for
+including the quote character itself: if you want to declare a
+character constant containing a double quote, enclose it in single
+quotes, and vice versa.
+
+Character constants' values are worked out in terms of a
+little-endian computer: if you code
+
+ mov eax,'abcd'
+
+then if you were to examine the binary output from NASM, it would
+contain the visible string `abcd', which of course means that the
+actual value loaded into EAX would be 0x64636261, not 0x61626364.
+
+String constants are like character constants, only more so: if a
+character constant appearing as operand to a DB, DW or DD is longer
+than the word size involved (1, 2 or 4 respectively), it will be
+treated as a string constant instead, which is to say the
+concatenation of separate character constants.
+
+For example,
+
+ db 'hello, world'
+
+declares a twelve-character string constant. And
+
+ dd 'dontpanic'
+
+(a string constant) is equivalent to writing
+
+ dd 'dont','pani','c'
+
+(three character constants), so that what actually gets assembled is
+equivalent to
+
+ db 'dontpanic',0,0,0
+
+(It's worth noting that one of the reasons for the reversal of
+character constants is so that the instruction `dw "ab"' has the
+same meaning whether "ab" is treated as a character constant or a
+string constant. Hence there is less confusion.)
+
+Expressions
+===========
+
+Expressions in NASM can be formed of the following operators: `|'
+(bitwise OR), `^' (bitwise XOR), `&' (bitwise AND), `<<' and `>>'
+(logical bit shifts), `+', `-', `*' (ordinary addition, subtraction
+and multiplication), `/', `%' (unsigned division and modulo), `//',
+`%%' (signed division and modulo), `~' (bitwise NOT), and the
+operators SEG and WRT (see `SEG and WRT' below).
+
+The order of precedence is:
+
+| lowest
+^
+&
+<< >>
+binary + and -
+* / % // %%
+unary + and -, ~, SEG highest
+
+As usual, operators within a precedence level associate to the left
+(i.e. `2-3-4' evaluates the same way as `(2-3)-4').
+
+A form of algebra is done by NASM when evaluating expressions: I
+have already stated that an effective address expression such as
+`[EAX*6-EAX]' will be recognised by NASM as algebraically equivalent
+to `[EAX*4+EAX]', and assembled as such. In addition, algebra can be
+done on labels as well: `label2*2-label1' is an acceptable way to
+define an address as far beyond `label2' as `label1' is before it.
+(In less algebraically capable assemblers, one might have to write
+that as `label2 + (label2-label1)', where the value of every
+sub-expression is either a valid address or a constant. NASM can of
+course cope with that version as well.)
+
+Expressions may also contain the special token `$', known as a Here
+token, which always evaluates to the address of the current assembly
+point. (That is, the address of the assembly point _before_ the
+current instruction gets assembled.) The special token `$$'
+evaluates to the address of the beginning of the current section;
+this can be used for alignment, as shown below:
+
+ times ($$-$) & 3 nop ; pad with NOPs to 4-byte boundary
+
+SEG and WRT
+===========
+
+NASM contains the capability for its object file formats (currently,
+only `obj' makes use of this) to permit programs to directly refer
+to the segment-base values of their segments. This is achieved
+either by the object format defining the segment names as symbols
+(`obj' does this), or by the use of the SEG operator.
+
+SEG is a unary prefix operator which, when applied to a symbol
+defined in a segment, will yield the segment base value of that
+segment. (In `obj' format, symbols defined in segments which are
+grouped are considered to be primarily a member of the _group_, not
+the segment, and the return value of SEG reflects this.)
+
+SEG may be used for far pointers: it is guaranteed that for any
+symbol `sym', using the offset `sym' from the segment base `SEG sym'
+yields a correct pointer to the symbol. Hence you can code a far
+call by means of
+
+ CALL SEG routine:routine
+
+or store a far pointer in a data segment by
+
+ DW routine, SEG routine
+
+For convenience, NASM supports the forms
+
+ CALL FAR routine
+ JMP FAR routine
+
+as direct synonyms for the canonical syntax
+
+ CALL SEG routine:routine
+ JMP SEG routine:routine
+
+No alternative syntax for
+
+ DW routine, SEG routine
+
+is supported.
+
+Simply referring to `sym', for some symbol, will return the offset
+of `sym' from its _preferred_ segment base (as returned from `SEG
+sym'); sometimes, you may want to obtain the offset of `sym' from
+some _other_ segment base. (E.g. the offset of `sym' from the base
+of the segment it's in, where normally you'd get the offset from a
+group base). This is accomplished using the WRT (With Reference To)
+keyword: if `sym' is defined in segment `seg' but you want its
+offset relative to the beginning of segment `seg2', you can do
+
+ mov ax,sym WRT seg2
+
+The right-hand operand to WRT must be a segment-base value. You can
+also do `sym WRT SEG sym2' if you need to.
+
+Critical Expressions
+====================
+
+NASM is a two-pass assembler: it goes over the input once to
+determine the location of all the symbols, then once more to
+actually generate the output code. Most expressions are
+non-critical, in that if they contain a forward reference and hence
+their correct value is unknown during the first pass, it doesn't
+matter. However, arguments to RESB, RESW and RESD, and the argument
+to the TIMES prefix, can actually affect the _size_ of the generated
+code, and so it is critical that the expression can be evaluated
+correctly on the first pass. So in these situations, expressions may
+not contain forward references. This prevents NASM from having to
+sort out a mess such as
+
+ times (label-$) db 0
+label: db 'where am I?'
+
+in which the TIMES argument could equally legally evaluate to
+_anything_, or perhaps even worse,
+
+ times (label-$+1) db 0
+label: db 'NOW where am I?'
+
+in which any value for the TIMES argument is by definition invalid.
+
+Since NASM is a two-pass assembler, this criticality condition also
+applies to the argument to EQU. Suppose, if this were not the case,
+we were to have the setup
+
+ mov ax,a
+a equ b
+b:
+
+On pass one, `a' cannot be defined properly, since `b' is not known
+yet. On pass two, `b' is known, so line two can define `a' properly.
+Unfortunately, line 1 needed `a' to be defined properly, so this
+code will not assemble using only two passes.
+
+Local Labels
+============
+
+NASM takes its local label scheme mainly from the old Amiga
+assembler Devpac: a local label is one that begins with a period.
+The `localness' comes from the fact that local labels are associated
+with the previous non-local label, so that you may declare the same
+local label twice if a non-local one intervenes. Hence:
+
+label1 ; some code
+.loop ; some more code
+ jne .loop
+ ret
+label2 ; some code
+.loop ; some more code
+ jne .loop
+ ret
+
+In the above code, each `jne' instruction jumps to the line of code
+before it, since the `.loop' labels are distinct from each other.
+
+NASM, however, introduces an extra capability not present in Devpac,
+which is that the local labels are actually _defined_ in terms of
+their associated non-local label. So if you really have to, you can
+write
+
+label3 ; some more code
+ ; and some more
+ jmp label1.loop
+
+So although local labels are _usually_ local, it is possible to
+reference them from anywhere in your program, if you really have to.
+
+Assembler Directives
+====================
+
+Assembler directives appear on a line by themselves (apart from a
+comment), and must be enclosed in square brackets. No white space
+may appear before the opening square bracket, although white space
+and a comment may come after the closing bracket.
+
+Some directives are universal: they may be used in any situation,
+and do not change their syntax. The universal directives are listed
+below.
+
+[BITS 16] or [BITS 32] switches NASM into 16-bit or 32-bit mode.
+(This is equivalent to USE16 and USE32 segments, in TASM or MASM.)
+In 32-bit mode, instructions are prefixed with 0x66 or 0x67 prefixes
+when they use 16-bit data or addresses; in 16-bit mode, the reverse
+happens. NASM's default depends on the object format; the defaults
+are documented with the formats. (See `obj', in particular, for some
+unusual behaviour.)
+
+[INCLUDE filename] or [INC filename] includes another source file
+into the current one. At present, only one level of inclusion is
+supported.
+
+[SECTION name] or [SEGMENT name] changes which section the code you
+write will be assembled into. Acceptable section names vary between
+output formats, but most formats (indeed, all formats at the moment)
+support the names `.text', `.data' and `.bss'. Note that `.bss' is
+an uninitialised data section, and so you will receive a warning
+from NASM if you try to assemble any code or data in it. The only
+thing you can do in `.bss' without triggering a warning is use RESB,
+RESW and RESD. That's what they're for.
+
+[ABSOLUTE address] can be considered a different form of [SECTION],
+in that it must be overridden using a SECTION directive once you
+have finished using it. It is used to assemble notional code at an
+absolute offset address; of course, you can't actually assemble
+_code_ there, since no object file format is capable of putting the
+code in place, but you can use RESB, RESW and RESD, and you can
+define labels. Hence you could, for example, define a C-like data
+structure by means of
+
+ [ABSOLUTE 0]
+ stLong resd 1
+ stWord resw 1
+ stByte1 resb 1
+ stByte2 resb 1
+ st_size:
+ [SEGMENT .text]
+
+and then carry on coding. This defines `stLong' to be zero, `stWord'
+to be 4, `stByte1' to be 6, `stByte2' to be 7 and `st_size' to be 8.
+So this has defined a data structure.
+
+[EXTERN symbol] defines a symbol as being `external', in the C
+sense: `EXTERN' states that the symbol is _not_ declared in this
+module, but is declared elsewhere, and that you wish to _reference_
+it in this module.
+
+[GLOBAL symbol] defines a symbol as being global, in the sense that
+it is exported from this module and other modules may reference it.
+All symbols are local, unless declared as global. Note that the
+`GLOBAL' directive must appear before the definition of the symbol
+it refers to.
+
+[COMMON symbol size] defines a symbol as being common: it is
+declared to have the given size, and it is merged at link time with
+any declarations of the same symbol in other modules. This is not
+_fully_ supported in the `obj' file format: see the section on `obj'
+for details.
+
+Directives may also be specific to the output file format. At
+present, the `bin' and `obj' formats define extra directives, which
+are specified below.
+
+Output Formats
+==============
+
+The current output formats supported are `bin', `aout', `coff',
+`elf' and `win32'.
+
+`bin': flat-form binary
+-----------------------
+
+This is at present the only output format that generates instantly
+runnable code: all the others produce object files that need linking
+before they become executable.
+
+`bin' output files contain no red tape at all: they simply contain
+the binary representation of the exact code you wrote.
+
+The `bin' format supports a format-specific directive, which is ORG.
+[ORG addr] declares that your code should be assembled as if it were
+to be loaded into memory at the address `addr'. So a DOS .COM file
+should state [ORG 0x100], and a DOS .SYS file should state [ORG 0].
+There should be _one_ ORG directive, at most, in an assembly file:
+NASM does not support the use of ORG to jump around inside an object
+file, like MASM does (see the `Bugs' section for a use of the ORG
+directive not supported by NASM).
+
+Like all formats, the `bin' format defines the section names
+`.text', `.data' and `.bss'. The layout is that `.text' comes first
+in the output file, followed by `.data', and notionally followed by
+`.bss'. So if you declare a BSS section in a flat binary file,
+references to the BSS section will refer to space past the end of
+the actual file. The `.data' and `.bss' sections are considered to
+be aligned on four-byte boundaries: this is achieved by inserting
+padding zero bytes between the end of the text section and the start
+of the data, if there is data present. Of course if no [SECTION]
+directives are present, everything will go into `.text', and you
+will get nothing in the output except the code you wrote.
+
+`bin' silently ignores GLOBAL directives, and will also not complain
+at EXTERN ones. You only get an error if you actually _reference_ an
+external symbol.
+
+Using the `bin' format, the default output filename is `filename'
+for inputs of `filename.asm'. If there is no extension to be
+removed, output will be placed in `nasm.out' and a warning will be
+generated.
+
+`bin' defaults to 16-bit assembly mode.
+
+`aout' and `elf': Linux object files
+------------------------------------
+
+These two object formats are the ones used under Linux. They have no
+format-specific directives, and their default output filename is
+`filename.o'.
+
+ELF is a much more featureful object-file format than a.out: in
+particular it has enough features to support the writing of position
+independent code by means of a global offset table, and position
+independent shared libraries by means of a procedure linkage table.
+Unfortunately NASM, as yet, does not support these extensions, and
+so NASM cannot be used to write shared library code under ELF. NASM
+also does not support the capability, in ELF, for specifying precise
+alignment constraints on common variables.
+
+Both `aout' and `elf' default to 32-bit assembly mode.
+
+`coff' and `win32': Common Object File Format
+---------------------------------------------
+
+The `coff' format generates standard Unix COFF object files, which
+can be fed to (for example) the DJGPP linker. Its default output
+filename, like the other Unix formats, is `filename.o'.
+
+The `win32' format generates Win32 (Windows 95 or Intel-platform
+Windows NT) object files, which nominally use the COFF standard, but
+in fact are not compatible. Its default output filename is
+`filename.obj'.
+
+`coff' and `win32' are not quite compatible formats, due to the fact
+that Microsoft's interpretation of the term `relative relocation'
+does not seem to be the same as the interpretation used by anyone
+else. It is therefore more correct to state that Win32 uses a
+_variant_ of COFF. The object files will not therefore produce
+correct output when fed to each other's linkers.
+
+In addition to this subtle incompatibility, Win32 also defines
+extensions to basic COFF, such as a mechanism for importing symbols
+from dynamic-link libraries at load time. NASM may eventually
+support this extension in the form of a format-specific directive.
+However, as yet, it does not. Neither the `coff' nor `win32' output
+formats have any specific directives.
+
+The Microsoft linker also has a small blind spot: it cannot
+correctly relocate a relative CALL or JMP to an absolute address.
+Hence all PC-relative CALLs or JMPs, when using the `win32' format,
+must have targets which are relative to sections, or to external
+symbols. You can't do
+ call 0x123456
+_even_ if you happen to know that there is executable code at that
+address. The linker simply won't get the reference right; so in the
+interests of not generating incorrect code, NASM will not allow this
+form of reference to be written to a Win32 object file. (Standard
+COFF, or at least the DJGPP linker, seems to be able to cope with
+this contingency. Although that may be due to the executable having
+a zero load address.)
+
+Both `coff' and `win32' default to 32-bit assembly mode.
+
+`obj': Microsoft 16-bit Object Module Format
+--------------------------------------------
+
+The `obj' format generates 16-bit Microsoft object files, suitable
+for feeding to 16-bit versions of Microsoft C, and probably
+TLINK as well (although that hasn't been tested). The Use32
+extensions are supported.
+
+`obj' defines no special segment names: you can call segments what
+you like. Unlike the other formats, too, segment names are actually
+defined as symbols, so you can write
+
+[SEGMENT CODE]
+ mov ax,CODE
+
+and get the _segment_ address of the segment, suitable for loading
+into a segment register.
+
+Segments can be declared with attributes:
+
+[SEGMENT CODE PRIVATE ALIGN=16 CLASS=CODE OVERLAY=OVL2 USE16]
+
+You can specify segments to be PRIVATE, PUBLIC, COMMON or STACK;
+their alignment may be any power of two from 1 to 256 (although only
+1, 2, 4, 16 and 256 are really supported, so anything else gets
+rounded up to the next highest one of those); their class and
+overlay names may be specified. You may also specify segments to be
+USE16 or USE32. The defaults are PUBLIC ALIGN=1, no class, no
+alignment, USE16.
+
+You can also specify that a segment is _absolute_ at a certain
+segment address:
+
+[SEGMENT SCREEN ABSOLUTE=0xB800]
+
+This is an alternative to the ALIGN keyword.
+
+The format-specific directive GROUP allows segment grouping: [GROUP
+DGROUP DATA BSS] defines the group DGROUP to contain segments DATA
+and BSS.
+
+Segments are defined as part of their group by default: if `var' is
+declared in segment `data', which is part of group `dgroup', then
+`SEG var' returns `dgroup', and `var' signifies the offset of `var'
+relative to the beginning of `dgroup'. You must use `var WRT data'
+to get the offset of `var' relative to the beginning of its
+_segment_.
+
+NASM allows a segment to be in two groups, but will generate a
+warning. References to the symbols in that segment will be resolved
+relative to the _first_ group it is defined in.
+
+The directive [UPPERCASE] causes all symbol, segment and group names
+output to the object file to be uppercased. The actual _assembly_ is
+still case sensitive.
+
+Common variables in OBJ files can be `near' or `far': currently,
+NASM has a horribly grotty way to support that, which is that if you
+specify the common variable's size as negative, it will be near, and
+otherwise it will be far. The support isn't perfect: if you declare
+a far common variable both in a NASM assembly module and in a C
+program, you may well find the linker reports "mismatch in
+array-size" or some such. The reason for this is that far common
+variables are defined by means of _two_ size constants, which are
+multiplied to give the real size. Apparently the Microsoft linker
+(at least) likes both constants, not merely their product, to match
+up. This may be fixed in a future release.
+
+If the module you're writing is intended to contain the program
+entry point, you can declare this by defining the special label
+`..start' at the start point, either as a label or by EQU (although
+of course the normal caveats about EQU dependency still apply).
+
+`obj' has an unusual handling of assembly modes: instead of having a
+global default for the whole file, there is a separate default for
+each segment. Thus, each [SEGMENT] directive carries an implicit
+[BITS] directive with it, which switches to 16-bit or 32-bit mode
+depending on whether the segment is a Use16 or Use32 segment. If you
+want to place 32-bit code in a Use16 segment, you can use an
+explicit [BITS 32] override, but if you switch temporarily away from
+that segment, you will have to repeat the override after coming back
+to it.
+
+`as86': Linux as86 (bin86-0.3)
+------------------------------
+
+This output format replicates the format used to pass data between
+the Linux x86 assembler and linker, as86 and ld86. Its default file
+name, yet again, is `filename.o'. Its default segment-size attribute
+is 16 bits.
+
+`rdf': Relocatable Dynamic Object File Format
+---------------------------------------------
+
+RDOFF was designed initially to test the object-file production
+interface to NASM. It soon became apparent that it could be enhanced
+for use in serious applications due to its simplicity; code to load
+and execute an RDOFF object module is very simple. It also contains
+enhancements to allow it to be linked with a dynamic link library at
+either run- or load- time, depending on how complex you wish to make
+your loader.
+
+The `rdoff' directory in the NASM distribution archive contains
+source for an RDF linker and loader to run under Linux.
+
+`rdf' has a default segment-size attribute of 32 bits.
+
+Debugging format: `dbg'
+-----------------------
+
+This output format is not built into NASM by default: it's for
+debugging purposes. It produces a debug dump of everything that the
+NASM assembly module feeds to the output driver, for the benefit of
+people trying to write their own output drivers.
+
+Bugs
+====
+
+Apart from the missing features (correct OBJ COMMON support, ELF
+alignment, ELF PIC support, etc.), there are no _known_ bugs.
+However, any you find, with patches if possible, should be sent to
+<jules@dcs.warwick.ac.uk> or <anakin@pobox.com>, and we'll try to
+fix them.
+
+Beware of Pentium-specific instructions: Intel have provided a macro
+file for MASM, to implement the eight or nine new Pentium opcodes as
+MASM macros. NASM does not generate the same code for the CMPXCHG8B
+instruction as these macros do: this is due to a bug in the _macro_,
+not in NASM. The macro works by generating an SIDT instruction (if I
+remember rightly), which has almost exactly the right form, then
+using ORG to back up a bit and do a DB over the top of one of the
+opcode bytes. The trouble is that Intel overlooked (or were unable
+to allow for) the possibility that the SIDT instruction may contain
+an 0x66 or 0x67 operand or address size prefix. If this happens, the
+ORG will back up by the wrong amount, and the macro will generate
+incorrect code. NASM gets it right. This, also, is not a bug in
+NASM, so please don't report it as one. (Also please note that the
+ORG directive in NASM doesn't work this way, and so you can't do
+equivalent tricks with it...)
+
+That's All Folks!
+=================
+
+Enjoy using NASM! Please feel free to send me comments, or
+constructive criticism, or bug fixes, or requests, or general chat.
+
+Contributions are also welcome: if anyone knows anything about any
+other object file formats I should support, please feel free to send
+me documentation and some short example files (in my experience,
+documentation is useless without at _least_ one example), or even to
+write me an output module. OS/2 object files, in particular, spring
+to mind. I don't have OS/2, though.
+
+Please keep flames to a minimum: I have had some very angry e-mails
+in the past, condemning me for writing a useless assembler, that
+output in no useful format (at the time, that was true), generated
+incorrect code (several typos in the instruction table, since fixed)
+and took up too much memory and disk space (the price you pay for
+total portability, it seems). All these were criticisms I was happy
+to hear, but I didn't appreciate the flames that went with them.
+NASM _is_ still a prototype, and you use it at your own risk. I
+_think_ it works, and if it doesn't then I want to know about it,
+but I don't guarantee anything. So don't flame me, please. Blame,
+but don't flame.
+
+- Simon Tatham <anakin@pobox.com>, 21-Nov-96
diff --git a/nasm.h b/nasm.h
new file mode 100644
index 0000000..9609667
--- /dev/null
+++ b/nasm.h
@@ -0,0 +1,443 @@
+/* nasm.h main header file for the Netwide Assembler: inter-module interface
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ *
+ * initial version: 27/iii/95 by Simon Tatham
+ */
+
+#ifndef NASM_H
+#define NASM_H
+
+#define NASM_MAJOR_VER 0
+#define NASM_MINOR_VER 91
+#define NASM_VER "0.91"
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#ifndef FALSE
+#define FALSE 0 /* comes in handy */
+#endif
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#define NO_SEG -1L /* null segment value */
+#define SEG_ABS 0x40000000L /* mask for far-absolute segments */
+
+#ifndef FILENAME_MAX
+#define FILENAME_MAX 256
+#endif
+
+/*
+ * We must declare the existence of this structure type up here,
+ * since we have to reference it before we define it...
+ */
+struct ofmt;
+
+/*
+ * -------------------------
+ * Error reporting functions
+ * -------------------------
+ */
+
+/*
+ * An error reporting function should look like this.
+ */
+typedef void (*efunc) (int severity, char *fmt, ...);
+
+/*
+ * These are the error severity codes which get passed as the first
+ * argument to an efunc.
+ */
+
+#define ERR_WARNING 0 /* warn only: no further action */
+#define ERR_NONFATAL 1 /* terminate assembly after phase */
+#define ERR_FATAL 2 /* instantly fatal: exit with error */
+#define ERR_PANIC 3 /* internal error: panic instantly
+ * and dump core for reference */
+#define ERR_MASK 0x0F /* mask off the above codes */
+#define ERR_NOFILE 0x10 /* don't give source file name/line */
+#define ERR_USAGE 0x20 /* print a usage message */
+
+/*
+ * -----------------------
+ * Other function typedefs
+ * -----------------------
+ */
+
+/*
+ * A label-lookup function should look like this.
+ */
+typedef int (*lfunc) (char *label, long *segment, long *offset);
+
+/*
+ * And a label-definition function like this.
+ */
+typedef void (*ldfunc) (char *label, long segment, long offset,
+ struct ofmt *ofmt, efunc error);
+
+/*
+ * -----------------------------------------------------------
+ * Format of the `insn' structure returned from `parser.c' and
+ * passed into `assemble.c'
+ * -----------------------------------------------------------
+ */
+
+/*
+ * Here we define the operand types. These are implemented as bit
+ * masks, since some are subsets of others; e.g. AX in a MOV
+ * instruction is a special operand type, whereas AX in other
+ * contexts is just another 16-bit register. (Also, consider CL in
+ * shift instructions, DX in OUT, etc.)
+ */
+
+/* size, and other attributes, of the operand */
+#define BITS8 0x00000001L
+#define BITS16 0x00000002L
+#define BITS32 0x00000004L
+#define BITS64 0x00000008L /* FPU only */
+#define BITS80 0x00000010L /* FPU only */
+#define FAR 0x00000020L /* grotty: this means 16:16 or */
+ /* 16:32, like in CALL/JMP */
+#define NEAR 0x00000040L
+#define SHORT 0x00000080L /* and this means what it says :) */
+
+#define SIZE_MASK 0x000000FFL /* all the size attributes */
+#define NON_SIZE (~SIZE_MASK)
+
+#define TO 0x00000100L /* reverse effect in FADD, FSUB &c */
+#define COLON 0x00000200L /* operand is followed by a colon */
+
+/* type of operand: memory reference, register, etc. */
+#define MEMORY 0x00204000L
+#define REGISTER 0x00001000L /* register number in 'basereg' */
+#define IMMEDIATE 0x00002000L
+
+#define REGMEM 0x00200000L /* for r/m, ie EA, operands */
+#define REGNORM 0x00201000L /* 'normal' reg, qualifies as EA */
+#define REG8 0x00201001L
+#define REG16 0x00201002L
+#define REG32 0x00201004L
+#define FPUREG 0x01000000L /* floating point stack registers */
+#define FPU0 0x01000800L /* FPU stack register zero */
+#define MMXREG 0x00001008L /* MMX registers */
+
+/* special register operands: these may be treated differently */
+#define REG_SMASK 0x00070000L /* a mask for the following */
+#define REG_ACCUM 0x00211000L /* accumulator: AL, AX or EAX */
+#define REG_AL 0x00211001L /* REG_ACCUM | BITSxx */
+#define REG_AX 0x00211002L /* ditto */
+#define REG_EAX 0x00211004L /* and again */
+#define REG_COUNT 0x00221000L /* counter: CL, CX or ECX */
+#define REG_CL 0x00221001L /* REG_COUNT | BITSxx */
+#define REG_CX 0x00221002L /* ditto */
+#define REG_ECX 0x00221004L /* another one */
+#define REG_DX 0x00241002L
+#define REG_SREG 0x00081002L /* any segment register */
+#define REG_CS 0x01081002L /* CS */
+#define REG_DESS 0x02081002L /* DS, ES, SS (non-CS 86 registers) */
+#define REG_FSGS 0x04081002L /* FS, GS (386 extended registers) */
+#define REG_CDT 0x00101004L /* CRn, DRn and TRn */
+#define REG_CREG 0x08101004L /* CRn */
+#define REG_CR4 0x08101404L /* CR4 (Pentium only) */
+#define REG_DREG 0x10101004L /* DRn */
+#define REG_TREG 0x20101004L /* TRn */
+
+/* special type of EA */
+#define MEM_OFFS 0x00604000L /* simple [address] offset */
+
+/* special type of immediate operand */
+#define UNITY 0x00802000L /* for shift/rotate instructions */
+
+/*
+ * Next, the codes returned from the parser, for registers and
+ * instructions.
+ */
+
+enum { /* register names */
+ R_AH = 1, R_AL, R_AX, R_BH, R_BL, R_BP, R_BX, R_CH, R_CL, R_CR0,
+ R_CR2, R_CR3, R_CR4, R_CS, R_CX, R_DH, R_DI, R_DL, R_DR0, R_DR1,
+ R_DR2, R_DR3, R_DR6, R_DR7, R_DS, R_DX, R_EAX, R_EBP, R_EBX,
+ R_ECX, R_EDI, R_EDX, R_ES, R_ESI, R_ESP, R_FS, R_GS, R_MM0,
+ R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7, R_SI, R_SP,
+ R_SS, R_ST0, R_ST1, R_ST2, R_ST3, R_ST4, R_ST5, R_ST6, R_ST7,
+ R_TR3, R_TR4, R_TR5, R_TR6, R_TR7, REG_ENUM_LIMIT
+};
+
+enum { /* instruction names */
+ I_AAA, I_AAD, I_AAM, I_AAS, I_ADC, I_ADD, I_AND, I_ARPL,
+ I_BOUND, I_BSF, I_BSR, I_BSWAP, I_BT, I_BTC, I_BTR, I_BTS,
+ I_CALL, I_CBW, I_CDQ, I_CLC, I_CLD, I_CLI, I_CLTS, I_CMC, I_CMP,
+ I_CMPSB, I_CMPSD, I_CMPSW, I_CMPXCHG, I_CMPXCHG8B, I_CPUID,
+ I_CWD, I_CWDE, I_DAA, I_DAS, I_DB, I_DD, I_DEC, I_DIV, I_DQ,
+ I_DT, I_DW, I_EMMS, I_ENTER, I_EQU, I_F2XM1, I_FABS, I_FADD,
+ I_FADDP, I_FBLD, I_FBSTP, I_FCHS, I_FCLEX, I_FCMOVB, I_FCMOVBE,
+ I_FCMOVE, I_FCMOVNB, I_FCMOVNBE, I_FCMOVNE, I_FCMOVNU, I_FCMOVU,
+ I_FCOM, I_FCOMI, I_FCOMIP, I_FCOMP, I_FCOMPP, I_FCOS, I_FDECSTP,
+ I_FDISI, I_FDIV, I_FDIVP, I_FDIVR, I_FDIVRP, I_FENI, I_FFREE,
+ I_FIADD, I_FICOM, I_FICOMP, I_FIDIV, I_FIDIVR, I_FILD, I_FIMUL,
+ I_FINCSTP, I_FINIT, I_FIST, I_FISTP, I_FISUB, I_FISUBR, I_FLD,
+ I_FLD1, I_FLDCW, I_FLDENV, I_FLDL2E, I_FLDL2T, I_FLDLG2,
+ I_FLDLN2, I_FLDPI, I_FLDZ, I_FMUL, I_FMULP, I_FNOP, I_FPATAN,
+ I_FPREM, I_FPREM1, I_FPTAN, I_FRNDINT, I_FRSTOR, I_FSAVE,
+ I_FSCALE, I_FSETPM, I_FSIN, I_FSINCOS, I_FSQRT, I_FST, I_FSTCW,
+ I_FSTENV, I_FSTP, I_FSTSW, I_FSUB, I_FSUBP, I_FSUBR, I_FSUBRP,
+ I_FTST, I_FUCOM, I_FUCOMI, I_FUCOMIP, I_FUCOMP, I_FUCOMPP,
+ I_FXAM, I_FXCH, I_FXTRACT, I_FYL2X, I_FYL2XP1, I_HLT, I_ICEBP,
+ I_IDIV, I_IMUL, I_IN, I_INC, I_INSB, I_INSD, I_INSW, I_INT,
+ I_INT1, I_INT01, I_INT3, I_INTO, I_INVD, I_INVLPG, I_IRET,
+ I_IRETD, I_IRETW, I_JCXZ, I_JECXZ, I_JMP, I_LAHF, I_LAR, I_LDS,
+ I_LEA, I_LEAVE, I_LES, I_LFS, I_LGDT, I_LGS, I_LIDT, I_LLDT,
+ I_LMSW, I_LOADALL, I_LODSB, I_LODSD, I_LODSW, I_LOOP, I_LOOPE,
+ I_LOOPNE, I_LOOPNZ, I_LOOPZ, I_LSL, I_LSS, I_LTR, I_MOV, I_MOVD,
+ I_MOVQ, I_MOVSB, I_MOVSD, I_MOVSW, I_MOVSX, I_MOVZX, I_MUL,
+ I_NEG, I_NOP, I_NOT, I_OR, I_OUT, I_OUTSB, I_OUTSD, I_OUTSW,
+ I_PACKSSDW, I_PACKSSWB, I_PACKUSWB, I_PADDB, I_PADDD, I_PADDSB,
+ I_PADDSW, I_PADDUSB, I_PADDUSW, I_PADDW, I_PAND, I_PANDN,
+ I_PCMPEQB, I_PCMPEQD, I_PCMPEQW, I_PCMPGTB, I_PCMPGTD,
+ I_PCMPGTW, I_PMADDWD, I_PMULHW, I_PMULLW, I_POP, I_POPA,
+ I_POPAD, I_POPAW, I_POPF, I_POPFD, I_POPFW, I_POR, I_PSLLD,
+ I_PSLLQ, I_PSLLW, I_PSRAD, I_PSRAW, I_PSRLD, I_PSRLQ, I_PSRLW,
+ I_PSUBB, I_PSUBD, I_PSUBSB, I_PSUBSW, I_PSUBUSB, I_PSUBUSW,
+ I_PSUBW, I_PUNPCKHBW, I_PUNPCKHDQ, I_PUNPCKHWD, I_PUNPCKLBW,
+ I_PUNPCKLDQ, I_PUNPCKLWD, I_PUSH, I_PUSHA, I_PUSHAD, I_PUSHAW,
+ I_PUSHF, I_PUSHFD, I_PUSHFW, I_PXOR, I_RCL, I_RCR, I_RDMSR,
+ I_RDPMC, I_RDTSC, I_RESB, I_RESD, I_RESQ, I_REST, I_RESW, I_RET,
+ I_RETF, I_RETN, I_ROL, I_ROR, I_RSM, I_SAHF, I_SAL, I_SALC,
+ I_SAR, I_SBB, I_SCASB, I_SCASD, I_SCASW, I_SGDT, I_SHL, I_SHLD,
+ I_SHR, I_SHRD, I_SIDT, I_SLDT, I_SMSW, I_STC, I_STD, I_STI,
+ I_STOSB, I_STOSD, I_STOSW, I_STR, I_SUB, I_TEST, I_UMOV, I_VERR,
+ I_VERW, I_WAIT, I_WBINVD, I_WRMSR, I_XADD, I_XCHG, I_XLATB,
+ I_XOR, I_CMOVcc, I_Jcc, I_SETcc
+};
+
+enum { /* condition code names */
+ C_A, C_AE, C_B, C_BE, C_C, C_E, C_G, C_GE, C_L, C_LE, C_NA, C_NAE,
+ C_NB, C_NBE, C_NC, C_NE, C_NG, C_NGE, C_NL, C_NLE, C_NO, C_NP,
+ C_NS, C_NZ, C_O, C_P, C_PE, C_PO, C_S, C_Z
+};
+
+/*
+ * Note that because segment registers may be used as instruction
+ * prefixes, we must ensure the enumerations for prefixes and
+ * register names do not overlap.
+ */
+enum { /* instruction prefixes */
+ PREFIX_ENUM_START = REG_ENUM_LIMIT,
+ P_A16 = PREFIX_ENUM_START, P_A32, P_LOCK, P_O16, P_O32, P_REP, P_REPE,
+ P_REPNE, P_REPNZ, P_REPZ, P_TIMES
+};
+
+enum { /* extended operand types */
+ EOT_NOTHING, EOT_DB_STRING, EOT_DB_NUMBER
+};
+
+typedef struct { /* operand to an instruction */
+ long type; /* type of operand */
+ int addr_size; /* 0 means default; 16; 32 */
+ int basereg, indexreg, scale; /* registers and scale involved */
+ long segment; /* immediate segment, if needed */
+ long offset; /* any immediate number */
+ long wrt; /* segment base it's relative to */
+} operand;
+
+typedef struct extop { /* extended operand */
+ struct extop *next; /* linked list */
+ long type; /* defined above */
+ char *stringval; /* if it's a string, then here it is */
+ int stringlen; /* ... and here's how long it is */
+ long segment; /* if it's a number/address, then... */
+ long offset; /* ... it's given here ... */
+ long wrt; /* ... and here */
+} extop;
+
+#define MAXPREFIX 4
+
+typedef struct { /* an instruction itself */
+ char *label; /* the label defined, or NULL */
+ int prefixes[MAXPREFIX]; /* instruction prefixes, if any */
+ int nprefix; /* number of entries in above */
+ int opcode; /* the opcode - not just the string */
+ int condition; /* the condition code, if Jcc/SETcc */
+ int operands; /* how many operands? 0-3 */
+ operand oprs[3]; /* the operands, defined as above */
+ extop *eops; /* extended operands */
+ int times; /* repeat count (TIMES prefix) */
+} insn;
+
+/*
+ * ------------------------------------------------------------
+ * The data structure defining an output format driver, and the
+ * interfaces to the functions therein.
+ * ------------------------------------------------------------
+ */
+
+struct ofmt {
+ /*
+ * This is a short (one-liner) description of the type of
+ * output generated by the driver.
+ */
+ char *fullname;
+
+ /*
+ * This is a single keyword used to select the driver.
+ */
+ char *shortname;
+
+ /*
+ * This procedure is called at the start of an output session.
+ * It tells the output format what file it will be writing to,
+ * what routine to report errors through, and how to interface
+ * to the label manager if necessary. It also gives it a chance
+ * to do other initialisation.
+ */
+ void (*init) (FILE *fp, efunc error, ldfunc ldef);
+
+ /*
+ * This procedure is called by assemble() to write actual
+ * generated code or data to the object file. Typically it
+ * doesn't have to actually _write_ it, just store it for
+ * later.
+ *
+ * The `type' argument specifies the type of output data, and
+ * usually the size as well: its contents are described below.
+ */
+ void (*output) (long segto, void *data, unsigned long type,
+ long segment, long wrt);
+
+ /*
+ * This procedure is called once for every symbol defined in
+ * the module being assembled. It gives the name and value of
+ * the symbol, in NASM's terms, and indicates whether it has
+ * been declared to be global. Note that the parameter "name",
+ * when passed, will point to a piece of static storage
+ * allocated inside the label manager - it's safe to keep using
+ * that pointer, because the label manager doesn't clean up
+ * until after the output driver has.
+ *
+ * Values of `is_global' are: 0 means the symbol is local; 1
+ * means the symbol is global; 2 means the symbol is common (in
+ * which case `offset' holds the _size_ of the variable).
+ * Anything else is available for the output driver to use
+ * internally.
+ */
+ void (*symdef) (char *name, long segment, long offset, int is_global);
+
+ /*
+ * This procedure is called when the source code requests a
+ * segment change. It should return the corresponding segment
+ * _number_ for the name, or NO_SEG if the name is not a valid
+ * segment name.
+ *
+ * It may also be called with NULL, in which case it is to
+ * return the _default_ section number for starting assembly in.
+ *
+ * It is allowed to modify the string it is given a pointer to.
+ *
+ * It is also allowed to specify a default instruction size for
+ * the segment, by setting `*bits' to 16 or 32. Or, if it
+ * doesn't wish to define a default, it can leave `bits' alone.
+ */
+ long (*section) (char *name, int pass, int *bits);
+
+ /*
+ * This procedure is called to modify the segment base values
+ * returned from the SEG operator. It is given a segment base
+ * value (i.e. a segment value with the low bit set), and is
+ * required to produce in return a segment value which may be
+ * different. It can map segment bases to absolute numbers by
+ * means of returning SEG_ABS types.
+ */
+ long (*segbase) (long segment);
+
+ /*
+ * This procedure is called to allow the output driver to
+ * process its own specific directives. When called, it has the
+ * directive word in `directive' and the parameter string in
+ * `value'. It is called in both assembly passes, and `pass'
+ * will be either 1 or 2.
+ *
+ * This procedure should return zero if it does not _recognise_
+ * the directive, so that the main program can report an error.
+ * If it recognises the directive but then has its own errors,
+ * it should report them itself and then return non-zero. It
+ * should also return non-zero if it correctly processes the
+ * directive.
+ */
+ int (*directive) (char *directive, char *value, int pass);
+
+ /*
+ * This procedure is called before anything else - even before
+ * the "init" routine - and is passed the name of the input
+ * file from which this output file is being generated. It
+ * should return its preferred name for the output file in
+ * `outfunc'. Since it is called before the driver is properly
+ * initialised, it has to be passed its error handler
+ * separately.
+ *
+ * This procedure may also take its own copy of the input file
+ * name for use in writing the output file: it is _guaranteed_
+ * that it will be called before the "init" routine.
+ *
+ * The parameter `outname' points to an area of storage
+ * guaranteed to be at least FILENAME_MAX in size.
+ */
+ void (*filename) (char *inname, char *outname, efunc error);
+
+ /*
+ * This procedure is called after assembly finishes, to allow
+ * the output driver to clean itself up and free its memory.
+ * Typically, it will also be the point at which the object
+ * file actually gets _written_.
+ *
+ * One thing the cleanup routine should always do is to close
+ * the output file pointer.
+ */
+ void (*cleanup) (void);
+};
+
+/*
+ * values for the `type' parameter to an output function. Each one
+ * must have the actual number of _bytes_ added to it.
+ *
+ * Exceptions are OUT_RELxADR, which denote an x-byte relocation
+ * which will be a relative jump. For this we need to know the
+ * distance in bytes from the start of the relocated record until
+ * the end of the containing instruction. _This_ is what is stored
+ * in the size part of the parameter, in this case.
+ *
+ * Also OUT_RESERVE denotes reservation of N bytes of BSS space,
+ * and the contents of the "data" parameter is irrelevant.
+ *
+ * The "data" parameter for the output function points to a "long",
+ * containing the address in question, unless the type is
+ * OUT_RAWDATA, in which case it points to an "unsigned char"
+ * array.
+ */
+#define OUT_RAWDATA 0x00000000UL
+#define OUT_ADDRESS 0x10000000UL
+#define OUT_REL2ADR 0x20000000UL
+#define OUT_REL4ADR 0x30000000UL
+#define OUT_RESERVE 0x40000000UL
+#define OUT_TYPMASK 0xF0000000UL
+#define OUT_SIZMASK 0x0FFFFFFFUL
+
+/*
+ * -----
+ * Other
+ * -----
+ */
+
+/*
+ * This is a useful #define which I keep meaning to use more often:
+ * the number of elements of a statically defined array.
+ */
+
+#define elements(x) ( sizeof(x) / sizeof(*(x)) )
+
+#endif
diff --git a/nasmlib.c b/nasmlib.c
new file mode 100644
index 0000000..c8710b0
--- /dev/null
+++ b/nasmlib.c
@@ -0,0 +1,488 @@
+/* nasmlib.c library routines for the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+
+static efunc nasm_malloc_error;
+
+void nasm_set_malloc_error (efunc error) {
+ nasm_malloc_error = error;
+}
+
+void *nasm_malloc (size_t size) {
+ void *p = malloc(size);
+ if (!p)
+ nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory");
+ return p;
+}
+
+void *nasm_realloc (void *q, size_t size) {
+ void *p = q ? realloc(q, size) : malloc(size);
+ if (!p)
+ nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory");
+ return p;
+}
+
+void nasm_free (void *q) {
+ if (q)
+ free (q);
+}
+
+char *nasm_strdup (char *s) {
+ char *p;
+
+ p = nasm_malloc(strlen(s)+1);
+ strcpy (p, s);
+ return p;
+}
+
+int nasm_stricmp (char *s1, char *s2) {
+ while (*s1 && toupper(*s1) == toupper(*s2))
+ s1++, s2++;
+ if (!*s1 && !*s2)
+ return 0;
+ else if (toupper(*s1) < toupper(*s2))
+ return -1;
+ else
+ return 1;
+}
+
+int nasm_strnicmp (char *s1, char *s2, int n) {
+ while (n > 0 && *s1 && toupper(*s1) == toupper(*s2))
+ s1++, s2++, n--;
+ if ((!*s1 && !*s2) || n==0)
+ return 0;
+ else if (toupper(*s1) < toupper(*s2))
+ return -1;
+ else
+ return 1;
+}
+
+#define isnumchar(c) ( isalnum(c) || (c) == '$')
+#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
+
+long readnum (char *str, int *error) {
+ char *r = str, *q;
+ long radix;
+ long result;
+
+ *error = FALSE;
+
+ while (isspace(*r)) r++; /* find start of number */
+ q = r;
+
+ while (isnumchar(*q)) q++; /* find end of number */
+
+ /*
+ * If it begins 0x, 0X or $, or ends in H, it's in hex. if it
+ * ends in Q, it's octal. if it ends in B, it's binary.
+ * Otherwise, it's ordinary decimal.
+ */
+ if (*r=='0' && (r[1]=='x' || r[1]=='X'))
+ radix = 16, r += 2;
+ else if (*r=='$')
+ radix = 16, r++;
+ else if (q[-1]=='H' || q[-1]=='h')
+ radix = 16 , q--;
+ else if (q[-1]=='Q' || q[-1]=='q')
+ radix = 8 , q--;
+ else if (q[-1]=='B' || q[-1]=='b')
+ radix = 2 , q--;
+ else
+ radix = 10;
+
+ result = 0;
+ while (*r && r < q) {
+ if (*r<'0' || (*r>'9' && *r<'A') || numvalue(*r)>=radix) {
+ *error = TRUE;
+ return 0;
+ }
+ result = radix * result + numvalue(*r);
+ r++;
+ }
+ return result;
+}
+
+static long next_seg;
+
+void seg_init(void) {
+ next_seg = 0;
+}
+
+long seg_alloc(void) {
+ return (next_seg += 2) - 2;
+}
+
+void fwriteshort (int data, FILE *fp) {
+ fputc (data & 255, fp);
+ fputc ((data >> 8) & 255, fp);
+}
+
+void fwritelong (long data, FILE *fp) {
+ fputc (data & 255, fp);
+ fputc ((data >> 8) & 255, fp);
+ fputc ((data >> 16) & 255, fp);
+ fputc ((data >> 24) & 255, fp);
+}
+
+void standard_extension (char *inname, char *outname, char *extension,
+ efunc error) {
+ char *p, *q;
+
+ q = inname;
+ p = outname;
+ while (*q) *p++ = *q++; /* copy, and find end of string */
+ *p = '\0'; /* terminate it */
+ while (p > outname && *--p != '.');/* find final period (or whatever) */
+ if (*p != '.') while (*p) p++; /* go back to end if none found */
+ if (!strcmp(p, extension)) { /* is the extension already there? */
+ if (*extension)
+ error(ERR_WARNING | ERR_NOFILE,
+ "file name already ends in `%s': "
+ "output will be in `nasm.out'",
+ extension);
+ else
+ error(ERR_WARNING | ERR_NOFILE,
+ "file name already has no extension: "
+ "output will be in `nasm.out'");
+ strcpy(outname, "nasm.out");
+ } else
+ strcpy(p, extension);
+}
+
+#define RAA_BLKSIZE 4096 /* this many longs allocated at once */
+#define RAA_LAYERSIZE 1024 /* this many _pointers_ allocated */
+
+typedef struct RAA RAA;
+typedef union RAA_UNION RAA_UNION;
+typedef struct RAA_LEAF RAA_LEAF;
+typedef struct RAA_BRANCH RAA_BRANCH;
+
+struct RAA {
+ int layers;
+ long stepsize;
+ union RAA_UNION {
+ struct RAA_LEAF {
+ long data[RAA_BLKSIZE];
+ } l;
+ struct RAA_BRANCH {
+ struct RAA *data[RAA_LAYERSIZE];
+ } b;
+ } u;
+};
+
+#define LEAFSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_LEAF))
+#define BRANCHSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_BRANCH))
+
+#define LAYERSIZ(r) ( (r)->layers==0 ? RAA_BLKSIZE : RAA_LAYERSIZE )
+
+static struct RAA *real_raa_init (int layers) {
+ struct RAA *r;
+
+ if (layers == 0) {
+ r = nasm_malloc (LEAFSIZ);
+ memset (r->u.l.data, 0, sizeof(r->u.l.data));
+ r->layers = 0;
+ r->stepsize = 1L;
+ } else {
+ r = nasm_malloc (BRANCHSIZ);
+ memset (r->u.b.data, 0, sizeof(r->u.b.data));
+ r->layers = layers;
+ r->stepsize = 1L;
+ while (layers--)
+ r->stepsize *= RAA_LAYERSIZE;
+ }
+ return r;
+}
+
+struct RAA *raa_init (void) {
+ return real_raa_init (0);
+}
+
+void raa_free (struct RAA *r) {
+ if (r->layers == 0)
+ nasm_free (r);
+ else {
+ struct RAA **p;
+ for (p = r->u.b.data; p - r->u.b.data < RAA_LAYERSIZE; p++)
+ if (*p)
+ raa_free (*p);
+ }
+}
+
+long raa_read (struct RAA *r, long posn) {
+ if (posn > r->stepsize * LAYERSIZ(r))
+ return 0L;
+ while (r->layers > 0) {
+ ldiv_t l;
+ l = ldiv (posn, r->stepsize);
+ r = r->u.b.data[l.quot];
+ posn = l.rem;
+ if (!r) /* better check this */
+ return 0L;
+ }
+ return r->u.l.data[posn];
+}
+
+struct RAA *raa_write (struct RAA *r, long posn, long value) {
+ struct RAA *result;
+
+ if (posn < 0)
+ nasm_malloc_error (ERR_PANIC, "negative position in raa_write");
+
+ while (r->stepsize * LAYERSIZ(r) < posn) {
+ /*
+ * Must go up a layer.
+ */
+ struct RAA *s;
+
+ s = nasm_malloc (BRANCHSIZ);
+ memset (s->u.b.data, 0, sizeof(r->u.b.data));
+ s->layers = r->layers + 1;
+ s->stepsize = RAA_LAYERSIZE * r->stepsize;
+ s->u.b.data[0] = r;
+ r = s;
+ }
+
+ result = r;
+
+ while (r->layers > 0) {
+ ldiv_t l;
+ struct RAA **s;
+ l = ldiv (posn, r->stepsize);
+ s = &r->u.b.data[l.quot];
+ if (!*s)
+ *s = real_raa_init (r->layers - 1);
+ r = *s;
+ posn = l.rem;
+ }
+
+ r->u.l.data[posn] = value;
+
+ return result;
+}
+
+#define SAA_MAXLEN 8192
+
+struct SAA {
+ /*
+ * members `end' and `elem_len' are only valid in first link in
+ * list; `rptr' and `rpos' are used for reading
+ */
+ struct SAA *next, *end, *rptr;
+ long elem_len, length, posn, start, rpos;
+ char *data;
+};
+
+struct SAA *saa_init (long elem_len) {
+ struct SAA *s;
+
+ if (elem_len > SAA_MAXLEN)
+ nasm_malloc_error (ERR_PANIC | ERR_NOFILE, "SAA with huge elements");
+
+ s = nasm_malloc (sizeof(struct SAA));
+ s->posn = s->start = 0L;
+ s->elem_len = elem_len;
+ s->length = SAA_MAXLEN - (SAA_MAXLEN % elem_len);
+ s->data = nasm_malloc (s->length);
+ s->next = NULL;
+ s->end = s;
+
+ return s;
+}
+
+void saa_free (struct SAA *s) {
+ struct SAA *t;
+
+ while (s) {
+ t = s->next;
+ nasm_free (s->data);
+ nasm_free (s);
+ s = t;
+ }
+}
+
+void *saa_wstruct (struct SAA *s) {
+ void *p;
+
+ if (s->end->length - s->end->posn < s->elem_len) {
+ s->end->next = nasm_malloc (sizeof(struct SAA));
+ s->end->next->start = s->end->start + s->end->posn;
+ s->end = s->end->next;
+ s->end->length = s->length;
+ s->end->next = NULL;
+ s->end->posn = 0L;
+ s->end->data = nasm_malloc (s->length);
+ }
+
+ p = s->end->data + s->end->posn;
+ s->end->posn += s->elem_len;
+ return p;
+}
+
+void saa_wbytes (struct SAA *s, void *data, long len) {
+ char *d = data;
+
+ while (len > 0) {
+ long l = s->end->length - s->end->posn;
+ if (l > len)
+ l = len;
+ if (l > 0) {
+ if (d) {
+ memcpy (s->end->data + s->end->posn, d, l);
+ d += l;
+ } else
+ memset (s->end->data + s->end->posn, 0, l);
+ s->end->posn += l;
+ len -= l;
+ }
+ if (len > 0) {
+ s->end->next = nasm_malloc (sizeof(struct SAA));
+ s->end->next->start = s->end->start + s->end->posn;
+ s->end = s->end->next;
+ s->end->length = s->length;
+ s->end->next = NULL;
+ s->end->posn = 0L;
+ s->end->data = nasm_malloc (s->length);
+ }
+ }
+}
+
+void saa_rewind (struct SAA *s) {
+ s->rptr = s;
+ s->rpos = 0L;
+}
+
+void *saa_rstruct (struct SAA *s) {
+ void *p;
+
+ if (!s->rptr)
+ return NULL;
+
+ if (s->rptr->posn - s->rpos < s->elem_len) {
+ s->rptr = s->rptr->next;
+ if (!s->rptr)
+ return NULL; /* end of array */
+ s->rpos = 0L;
+ }
+
+ p = s->rptr->data + s->rpos;
+ s->rpos += s->elem_len;
+ return p;
+}
+
+void *saa_rbytes (struct SAA *s, long *len) {
+ void *p;
+
+ if (!s->rptr)
+ return NULL;
+
+ p = s->rptr->data + s->rpos;
+ *len = s->rptr->posn - s->rpos;
+ s->rptr = s->rptr->next;
+ s->rpos = 0L;
+ return p;
+}
+
+void saa_rnbytes (struct SAA *s, void *data, long len) {
+ char *d = data;
+
+ while (len > 0) {
+ long l;
+
+ if (!s->rptr)
+ return;
+
+ l = s->rptr->posn - s->rpos;
+ if (l > len)
+ l = len;
+ if (l > 0) {
+ memcpy (d, s->rptr->data + s->rpos, l);
+ d += l;
+ s->rpos += l;
+ len -= l;
+ }
+ if (len > 0) {
+ s->rptr = s->rptr->next;
+ s->rpos = 0L;
+ }
+ }
+}
+
+void saa_fread (struct SAA *s, long posn, void *data, long len) {
+ struct SAA *p;
+ long pos;
+ char *cdata = data;
+
+ if (!s->rptr || posn > s->rptr->start + s->rpos)
+ saa_rewind (s);
+ while (posn >= s->rptr->start + s->rptr->posn) {
+ s->rptr = s->rptr->next;
+ if (!s->rptr)
+ return; /* what else can we do?! */
+ }
+
+ p = s->rptr;
+ pos = posn - s->rptr->start;
+ while (len) {
+ long l = s->rptr->posn - pos;
+ if (l > len)
+ l = len;
+ memcpy (cdata, s->rptr->data+pos, l);
+ len -= l;
+ cdata += l;
+ p = p->next;
+ if (!p)
+ return;
+ pos = 0L;
+ }
+}
+
+void saa_fwrite (struct SAA *s, long posn, void *data, long len) {
+ struct SAA *p;
+ long pos;
+ char *cdata = data;
+
+ if (!s->rptr || posn > s->rptr->start + s->rpos)
+ saa_rewind (s);
+ while (posn >= s->rptr->start + s->rptr->posn) {
+ s->rptr = s->rptr->next;
+ if (!s->rptr)
+ return; /* what else can we do?! */
+ }
+
+ p = s->rptr;
+ pos = posn - s->rptr->start;
+ while (len) {
+ long l = s->rptr->posn - pos;
+ if (l > len)
+ l = len;
+ memcpy (s->rptr->data+pos, cdata, l);
+ len -= l;
+ cdata += l;
+ p = p->next;
+ if (!p)
+ return;
+ pos = 0L;
+ }
+}
+
+void saa_fpwrite (struct SAA *s, FILE *fp) {
+ char *data;
+ long len;
+
+ saa_rewind (s);
+ while ( (data = saa_rbytes (s, &len)) )
+ fwrite (data, 1, len, fp);
+}
diff --git a/nasmlib.h b/nasmlib.h
new file mode 100644
index 0000000..d827371
--- /dev/null
+++ b/nasmlib.h
@@ -0,0 +1,115 @@
+/* nasmlib.c header file for nasmlib.h
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#ifndef NASM_NASMLIB_H
+#define NASM_NASMLIB_H
+
+/*
+ * Wrappers around malloc, realloc and free. nasm_malloc will
+ * fatal-error and die rather than return NULL; nasm_realloc will
+ * do likewise, and will also guarantee to work right on being
+ * passed a NULL pointer; nasm_free will do nothing if it is passed
+ * a NULL pointer.
+ */
+void nasm_set_malloc_error (efunc);
+void *nasm_malloc (size_t);
+void *nasm_realloc (void *, size_t);
+void nasm_free (void *);
+char *nasm_strdup (char *);
+
+/*
+ * ANSI doesn't guarantee the presence of `stricmp' or
+ * `strcasecmp'.
+ */
+int nasm_stricmp (char *, char *);
+int nasm_strnicmp (char *, char *, int);
+
+/*
+ * Convert a string into a number, using NASM number rules. Sets
+ * `*error' to TRUE if an error occurs, and FALSE otherwise.
+ */
+long readnum(char *str, int *error);
+
+/*
+ * seg_init: Initialise the segment-number allocator.
+ * seg_alloc: allocate a hitherto unused segment number.
+ */
+void seg_init(void);
+long seg_alloc(void);
+
+/*
+ * many output formats will be able to make use of this: a standard
+ * function to add an extension to the name of the input file
+ */
+void standard_extension (char *inname, char *outname, char *extension,
+ efunc error);
+
+/*
+ * some handy macros that will probably be of use in more than one
+ * output format: convert integers into little-endian byte packed
+ * format in memory
+ */
+
+#define WRITELONG(p,v) \
+ do { \
+ *(p)++ = (v) & 0xFF; \
+ *(p)++ = ((v) >> 8) & 0xFF; \
+ *(p)++ = ((v) >> 16) & 0xFF; \
+ *(p)++ = ((v) >> 24) & 0xFF; \
+ } while (0)
+
+#define WRITESHORT(p,v) \
+ do { \
+ *(p)++ = (v) & 0xFF; \
+ *(p)++ = ((v) >> 8) & 0xFF; \
+ } while (0)
+
+/*
+ * and routines to do the same thing to a file
+ */
+void fwriteshort (int data, FILE *fp);
+void fwritelong (long data, FILE *fp);
+
+/*
+ * Routines to manage a dynamic random access array of longs which
+ * may grow in size to be more than the largest single malloc'able
+ * chunk.
+ */
+
+struct RAA;
+
+struct RAA *raa_init (void);
+void raa_free (struct RAA *);
+long raa_read (struct RAA *, long);
+struct RAA *raa_write (struct RAA *r, long posn, long value);
+
+/*
+ * Routines to manage a dynamic sequential-access array, under the
+ * same restriction on maximum mallocable block. This array may be
+ * written to in two ways: a contiguous chunk can be reserved of a
+ * given size, and a pointer returned, or single-byte data may be
+ * written. The array can also be read back in the same two ways:
+ * as a series of big byte-data blocks or as a list of structures
+ * of a given size.
+ */
+
+struct SAA;
+
+struct SAA *saa_init (long elem_len); /* 1 == byte */
+void saa_free (struct SAA *);
+void *saa_wstruct (struct SAA *); /* return a structure of elem_len */
+void saa_wbytes (struct SAA *, void *, long); /* write arbitrary bytes */
+void saa_rewind (struct SAA *); /* for reading from beginning */
+void *saa_rstruct (struct SAA *); /* return NULL on EOA */
+void *saa_rbytes (struct SAA *, long *); /* return 0 on EOA */
+void saa_rnbytes (struct SAA *, void *, long); /* read a given no. of bytes */
+void saa_fread (struct SAA *s, long posn, void *p, long len); /* fixup */
+void saa_fwrite (struct SAA *s, long posn, void *p, long len); /* fixup */
+void saa_fpwrite (struct SAA *, FILE *);
+
+#endif
diff --git a/ndisasm.c b/ndisasm.c
new file mode 100644
index 0000000..8e7a4cd
--- /dev/null
+++ b/ndisasm.c
@@ -0,0 +1,270 @@
+/* ndisasm.c the Netwide Disassembler main module
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "sync.h"
+#include "disasm.h"
+
+#define BPL 8 /* bytes per line of hex dump */
+
+static const char *help =
+"usage: ndisasm [-a] [-i] [-h] [-r] [-u] [-b bits] [-o origin] [-s sync...]\n"
+" [-e bytes] [-k start,bytes] file\n"
+" -a or -i activates auto (intelligent) sync\n"
+" -u sets USE32 (32-bit mode)\n"
+" -b 16 or -b 32 sets number of bits too\n"
+" -h displays this text\n"
+" -r displays the version number\n"
+" -e skips <bytes> bytes of header\n"
+" -k avoids disassembling <bytes> bytes from position <start>\n";
+
+static void output_ins (unsigned long, unsigned char *, int, char *);
+static void skip (unsigned long dist, FILE *fp);
+
+int main(int argc, char **argv) {
+ unsigned char buffer[INSN_MAX * 2], *p, *q;
+ char outbuf[256];
+ char *pname = *argv;
+ char *filename = NULL;
+ unsigned long nextsync, synclen, initskip = 0L;
+ int lenread, lendis;
+ int autosync = FALSE;
+ int bits = 16;
+ int rn_error;
+ long offset;
+ FILE *fp;
+
+ offset = 0;
+ init_sync();
+
+ while (--argc) {
+ char *v, *vv, *p = *++argv;
+ if (*p == '-') {
+ p++;
+ while (*p) switch (tolower(*p)) {
+ case 'a': /* auto or intelligent sync */
+ case 'i':
+ autosync = TRUE;
+ p++;
+ break;
+ case 'h':
+ fprintf(stderr, help);
+ return 0;
+ break;
+ case 'r':
+ fprintf(stderr, "NDISASM version " NASM_VER "\n");
+ return 0;
+ break;
+ case 'u': /* USE32 */
+ bits = 32;
+ p++;
+ break;
+ case 'b': /* bits */
+ v = p[1] ? p+1 : --argc ? *++argv : NULL;
+ if (!v) {
+ fprintf(stderr, "%s: `-b' requires an argument\n", pname);
+ return 1;
+ }
+ if (!strcmp(v, "16"))
+ bits = 16;
+ else if (!strcmp(v, "32"))
+ bits = 32;
+ else {
+ fprintf(stderr, "%s: argument to `-b' should"
+ " be `16' or `32'\n", pname);
+ }
+ p = ""; /* force to next argument */
+ break;
+ case 'o': /* origin */
+ v = p[1] ? p+1 : --argc ? *++argv : NULL;
+ if (!v) {
+ fprintf(stderr, "%s: `-o' requires an argument\n", pname);
+ return 1;
+ }
+ offset = readnum (v, &rn_error);
+ if (rn_error) {
+ fprintf(stderr, "%s: `-o' requires a numeric argument\n",
+ pname);
+ return 1;
+ }
+ p = ""; /* force to next argument */
+ break;
+ case 's': /* sync point */
+ v = p[1] ? p+1 : --argc ? *++argv : NULL;
+ if (!v) {
+ fprintf(stderr, "%s: `-s' requires an argument\n", pname);
+ return 1;
+ }
+ add_sync (readnum (v, &rn_error), 0L);
+ if (rn_error) {
+ fprintf(stderr, "%s: `-s' requires a numeric argument\n",
+ pname);
+ return 1;
+ }
+ p = ""; /* force to next argument */
+ break;
+ case 'e': /* skip a header */
+ v = p[1] ? p+1 : --argc ? *++argv : NULL;
+ if (!v) {
+ fprintf(stderr, "%s: `-e' requires an argument\n", pname);
+ return 1;
+ }
+ initskip = readnum (v, &rn_error);
+ if (rn_error) {
+ fprintf(stderr, "%s: `-e' requires a numeric argument\n",
+ pname);
+ return 1;
+ }
+ p = ""; /* force to next argument */
+ break;
+ case 'k': /* skip a region */
+ v = p[1] ? p+1 : --argc ? *++argv : NULL;
+ if (!v) {
+ fprintf(stderr, "%s: `-k' requires an argument\n", pname);
+ return 1;
+ }
+ vv = strchr(v, ',');
+ if (!vv) {
+ fprintf(stderr, "%s: `-k' requires two numbers separated"
+ " by a comma\n", pname);
+ return 1;
+ }
+ *vv++ = '\0';
+ nextsync = readnum (v, &rn_error);
+ if (rn_error) {
+ fprintf(stderr, "%s: `-k' requires numeric arguments\n",
+ pname);
+ return 1;
+ }
+ synclen = readnum (vv, &rn_error);
+ if (rn_error) {
+ fprintf(stderr, "%s: `-k' requires numeric arguments\n",
+ pname);
+ return 1;
+ }
+ add_sync (nextsync, synclen);
+ p = ""; /* force to next argument */
+ break;
+ }
+ } else if (!filename) {
+ filename = p;
+ } else {
+ fprintf(stderr, "%s: more than one filename specified\n", pname);
+ return 1;
+ }
+ }
+
+ if (!filename) {
+ fprintf(stderr, help, pname);
+ return 0;
+ }
+
+ fp = fopen(filename, "rb");
+ if (initskip > 0)
+ skip (initskip, fp);
+
+ /*
+ * This main loop is really horrible, and wants rewriting with
+ * an axe. It'll stay the way it is for a while though, until I
+ * find the energy...
+ */
+
+ p = q = buffer;
+ nextsync = next_sync (offset, &synclen);
+ do {
+ unsigned long to_read = buffer+sizeof(buffer)-p;
+ if (to_read > nextsync-offset-(p-q))
+ to_read = nextsync-offset-(p-q);
+ lenread = fread (p, 1, to_read, fp);
+ p += lenread;
+ if (offset == nextsync) {
+ if (synclen) {
+ printf("%08lX skipping 0x%lX bytes\n", offset, synclen);
+ offset += synclen;
+ skip (synclen, fp);
+ }
+ p = q = buffer;
+ nextsync = next_sync (offset, &synclen);
+ }
+ while (p > q && (p - q >= INSN_MAX || lenread == 0)) {
+ lendis = disasm (q, outbuf, bits, offset, autosync);
+ if (!lendis || lendis > (p - q) ||
+ lendis > nextsync-offset)
+ lendis = eatbyte (q, outbuf);
+ output_ins (offset, q, lendis, outbuf);
+ q += lendis;
+ offset += lendis;
+ }
+ if (q >= buffer+INSN_MAX) {
+ unsigned char *r = buffer, *s = q;
+ int count = p - q;
+ while (count--)
+ *r++ = *s++;
+ p -= (q - buffer);
+ q = buffer;
+ }
+ } while (lenread > 0 || !feof(fp));
+ fclose (fp);
+ return 0;
+}
+
+static void output_ins (unsigned long offset, unsigned char *data,
+ int datalen, char *insn) {
+ int bytes;
+ printf("%08lX ", offset);
+
+ bytes = 0;
+ while (datalen > 0 && bytes < BPL) {
+ printf("%02X", *data++);
+ bytes++;
+ datalen--;
+ }
+
+ printf("%*s%s\n", (BPL+1-bytes)*2, "", insn);
+
+ while (datalen > 0) {
+ printf(" -");
+ bytes = 0;
+ while (datalen > 0 && bytes < BPL) {
+ printf("%02X", *data++);
+ bytes++;
+ datalen--;
+ }
+ printf("\n");
+ }
+}
+
+/*
+ * Skip a certain amount of data in a file, either by seeking if
+ * possible, or if that fails then by reading and discarding.
+ */
+static void skip (unsigned long dist, FILE *fp) {
+ char buffer[256]; /* should fit on most stacks :-) */
+
+ /*
+ * Got to be careful with fseek: at least one fseek I've tried
+ * doesn't approve of SEEK_CUR. So I'll use SEEK_SET and
+ * ftell... horrible but apparently necessary.
+ */
+ if (fseek (fp, dist+ftell(fp), SEEK_SET)) {
+ while (dist > 0) {
+ unsigned long len = (dist < sizeof(buffer) ?
+ dist : sizeof(buffer));
+ if (fread (buffer, 1, len, fp) < len) {
+ perror("fread");
+ exit(1);
+ }
+ dist -= len;
+ }
+ }
+}
diff --git a/ndisasm.doc b/ndisasm.doc
new file mode 100644
index 0000000..5b5374a
--- /dev/null
+++ b/ndisasm.doc
@@ -0,0 +1,199 @@
+ The Netwide Disassembler, NDISASM
+ =================================
+
+Introduction
+============
+
+The Netwide Disassembler is a small companion program to the Netwide
+Assembler, NASM. It seemed a shame to have an x86 assembler,
+complete with a full instruction table, and not make as much use of
+it as possible, so here's a disassembler which shares the
+instruction table (and some other bits of code) with NASM.
+
+The Netwide Disassembler does nothing except to produce
+disassemblies of _binary_ source files. NDISASM does not have any
+understanding of object file formats, like `objdump', and it will
+not understand DOS .EXE files like `debug' will. It just
+disassembles.
+
+Getting Started: Installation
+=============================
+
+See `nasm.doc' for installation instructions. NDISASM, like NASM,
+has a man page which you may want to put somewhere useful, if you
+are on a Unix system.
+
+Running NDISASM
+===============
+
+To disassemble a file, you will typically use a command of the form
+
+ ndisasm [-b16 | -b32] filename
+
+NDISASM can disassemble 16 bit code or 32 bit code equally easily,
+provided of course that you remember to specify which it is to work
+with. If no `-b' switch is present, NDISASM works in 16-bit mode by
+default. The `-u' switch (for USE32) also invokes 32-bit mode.
+
+Two more command line options are `-r' which reports the version
+number of NDISASM you are running, and `-h' which gives a short
+summary of command line options.
+
+COM Files: Specifying an Origin
+===============================
+
+To disassemble a DOS .COM file correctly, a disassembler must assume
+that the first instruction in the file is loaded at address 0x100,
+rather than at zero. NDISASM, which assumes by default that any file
+you give it is loaded at zero, will therefore need to be informed of
+this.
+
+The `-o' option allows you to declare a different origin for the
+file you are disassembling. Its argument may be expressed in any of
+the NASM numeric formats: decimal by default, if it begins with `$'
+or `0x' or ends in `H' it's hex, if it ends in `Q' it's octal, and
+if it ends in `B' it's binary.
+
+Hence, to disassemble a .COM file:
+
+ ndisasm -o100h filename.com
+
+will do the trick.
+
+Code Following Data: Synchronisation
+====================================
+
+Suppose you are disassembling a file which contains some data which
+isn't machine code, and _then_ contains some machine code. NDISASM
+will faithfully plough through the data section, producing machine
+instructions wherever it can (although most of them will look
+bizarre, and some may have unusual prefixes, e.g. `fs or
+ax,0x240a'), and generating `db' instructions every so often if it's
+totally stumped. Then it will reach the code section.
+
+Supposing NDISASM has just finished generating a strange machine
+instruction from part of the data section, and its file position is
+now one byte _before_ the beginning of the code section. It's
+entirely possible that another spurious instruction will get
+generated, starting with the final byte of the data section, and
+then the correct first instruction in the code section will not be
+seen because the starting point skipped over it. This isn't really
+ideal.
+
+To avoid this, you can specify a `synchronisation' point, or indeed
+as many synchronisation points as you like (although NDISASM can
+only handle 8192 sync points internally). The definition of a sync
+point is this: NDISASM guarantees to hit sync points exactly during
+disassembly. If it is thinking about generating an instruction which
+would cause it to jump over a sync point, it will discard that
+instruction and output a `db' instead. So it _will_ start
+disassembly exactly from the sync point, and so you _will_ see all
+the instructions in your code section.
+
+Sync points are specified using the `-s' option: they are measured
+in terms of the program origin, not the file position. So if you
+want to synchronise after 32 bytes of a .COM file, you would have to
+do
+
+ ndisasm -o100h -s120h file.com
+
+rather than
+
+ ndisasm -o100h -s20h file.com
+
+As stated above, you can specify multiple sync markers if you need
+to, just by repeating the `-s' option.
+
+Mixed Code and Data: Automatic (Intelligent) Synchronisation
+============================================================
+
+Suppose you are disassembling the boot sector of a DOS floppy (maybe
+it has a virus, and you need to understand the virus so that you
+know what kinds of damage it might have done you). Typically, this
+will contain a JMP instruction, then some data, then the rest of the
+code. So there is a very good chance of NDISASM being misaligned
+when the data ends and the code begins. Hence a sync point is
+needed.
+
+On the other hand, why should you have to specify the sync point
+manually? What you'd do in order to find where the sync point would
+be, surely, would be to read the JMP instruction, and then to use
+its target address as a sync point. So can NDISASM do that for you?
+
+The answer, of course, is yes: using either of the synonymous
+switches `-a' (for automatic sync) or `-i' (for intelligent sync)
+will enable auto-sync mode. Auto-sync mode automatically generates a
+sync point for any forward-referring PC-relative jump or call
+instruction that NDISASM encounters. (Since NDISASM is one-pass, if
+it encounters a PC-relative jump whose target has already been
+processed, there isn't much it can do about it...)
+
+Only PC-relative jumps are processed, since an absolute jump is
+either through a register (in which case NDISASM doesn't know what
+the register contains) or involves a segment address (in which case
+the target code isn't in the same segment that NDISASM is working
+in, and so the sync point can't be placed anywhere useful).
+
+For some kinds of file, this mechanism will automatically put sync
+points in all the right places, and save you from having to place
+any sync points manually. However, it should be stressed that
+auto-sync mode is _not_ guaranteed to catch all the sync points, and
+you may still have to place some manually.
+
+Auto-sync mode doesn't prevent you from declaring manual sync
+points: it just adds automatically generated ones to the ones you
+provide. It's perfectly feasible to specify `-i' _and_ some `-s'
+options.
+
+Another caveat with auto-sync mode is that if, by some unpleasant
+fluke, something in your data section should disassemble to a
+PC-relative call or jump instruction, NDISASM may obediently place a
+sync point in a totally random place, for example in the middle of
+one of the instructions in your code section. So you may end up with
+a wrong disassembly even if you use auto-sync. Again, there isn't
+much I can do about this. If you have problems, you'll have to use
+manual sync points, or use the `-k' option (documented below) to
+suppress disassembly of the data area.
+
+Other Options
+=============
+
+The `-e' option skips a header on the file, by ignoring the first N
+bytes. This means that the header is _not_ counted towards the
+disassembly offset: if you give `-e10 -o10', disassembly will start
+at byte 10 in the file, and this will be given offset 10, not 20.
+
+The `-k' option is provided with two comma-separated numeric
+arguments, the first of which is an assembly offset and the second
+is a number of bytes to skip. This _will_ count the skipped bytes
+towards the assembly offset: its use is to suppress disassembly of a
+data section which wouldn't contain anything you wanted to see
+anyway.
+
+Bugs and Improvements
+=====================
+
+There are no known bugs. However, any you find, with patches if
+possible, should be sent to <jules@dcs.warwick.ac.uk> or
+<anakin@pobox.com>, and we'll try to fix them. Feel free to send
+contributions and new features as well.
+
+Future plans include awareness of which processors certain
+instructions will run on, and marking of instructions that are too
+advanced for some processor (or are FPU instructions, or are
+undocumented opcodes, or are privileged protected-mode instructions,
+or whatever).
+
+That's All Folks!
+=================
+
+I hope NDISASM is of some use to somebody. Including me. :-)
+
+I don't recommend taking NDISASM apart to see how an efficient
+disassembler works, because as far as I know, it isn't an efficient
+one anyway. You have been warned.
+
+Please feel free to send comments, suggestions, or chat to
+<anakin@pobox.com>. As with NASM, no flames please.
+
+- Simon Tatham <anakin@pobox.com>, 21-Nov-96
diff --git a/outaout.c b/outaout.c
new file mode 100644
index 0000000..e0ada3c
--- /dev/null
+++ b/outaout.c
@@ -0,0 +1,466 @@
+/* outaout.c output routines for the Netwide Assembler to produce
+ * Linux a.out object files
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "outform.h"
+
+#ifdef OF_AOUT
+
+struct Reloc {
+ struct Reloc *next;
+ long address; /* relative to _start_ of section */
+ long symbol; /* symbol number or -ve section id */
+ int bytes; /* 2 or 4 */
+ int relative; /* TRUE or FALSE */
+};
+
+struct Symbol {
+ long strpos; /* string table position of name */
+ int type; /* symbol type - see flags below */
+ long value; /* address, or COMMON variable size */
+};
+
+/*
+ * Section IDs - used in Reloc.symbol when negative, and in
+ * Symbol.type when positive.
+ */
+#define SECT_ABS 2 /* absolute value */
+#define SECT_TEXT 4 /* text section */
+#define SECT_DATA 6 /* data section */
+#define SECT_BSS 8 /* bss section */
+#define SECT_MASK 0xE /* mask out any of the above */
+
+/*
+ * Another flag used in Symbol.type.
+ */
+#define SYM_GLOBAL 1 /* it's a global symbol */
+
+/*
+ * Bit more explanation of symbol types: SECT_xxx denotes a local
+ * symbol. SECT_xxx|SYM_GLOBAL denotes a global symbol, defined in
+ * this module. Just SYM_GLOBAL, with zero value, denotes an
+ * external symbol referenced in this module. And just SYM_GLOBAL,
+ * but with a non-zero value, declares a C `common' variable, of
+ * size `value'.
+ */
+
+struct Section {
+ struct SAA *data;
+ unsigned long len, size, nrelocs;
+ long index;
+ struct Reloc *head, **tail;
+};
+
+static struct Section stext, sdata;
+static unsigned long bsslen;
+static long bssindex;
+
+static struct SAA *syms;
+static unsigned long nsyms;
+
+static struct RAA *bsym;
+
+static struct SAA *strs;
+static unsigned long strslen;
+
+static FILE *aoutfp;
+static efunc error;
+
+static void aout_write(void);
+static void aout_write_relocs(struct Reloc *);
+static void aout_write_syms(void);
+static void aout_sect_write(struct Section *, unsigned char *, unsigned long);
+static void aout_pad_sections(void);
+static void aout_fixup_relocs(struct Section *);
+
+static void aout_init(FILE *fp, efunc errfunc, ldfunc ldef) {
+ aoutfp = fp;
+ error = errfunc;
+ (void) ldef; /* placate optimisers */
+ stext.data = saa_init(1L); stext.head = NULL; stext.tail = &stext.head;
+ sdata.data = saa_init(1L); sdata.head = NULL; sdata.tail = &sdata.head;
+ stext.len = stext.size = sdata.len = sdata.size = bsslen = 0;
+ stext.nrelocs = sdata.nrelocs = 0;
+ stext.index = seg_alloc();
+ sdata.index = seg_alloc();
+ bssindex = seg_alloc();
+ syms = saa_init((long)sizeof(struct Symbol));
+ nsyms = 0;
+ bsym = raa_init();
+ strs = saa_init(1L);
+ strslen = 0;
+}
+
+static void aout_cleanup(void) {
+ struct Reloc *r;
+
+ aout_pad_sections();
+ aout_fixup_relocs(&stext);
+ aout_fixup_relocs(&sdata);
+ aout_write();
+ fclose (aoutfp);
+ saa_free (stext.data);
+ while (stext.head) {
+ r = stext.head;
+ stext.head = stext.head->next;
+ nasm_free (r);
+ }
+ saa_free (sdata.data);
+ while (sdata.head) {
+ r = sdata.head;
+ sdata.head = sdata.head->next;
+ nasm_free (r);
+ }
+ saa_free (syms);
+ raa_free (bsym);
+ saa_free (strs);
+}
+
+static long aout_section_names (char *name, int pass, int *bits) {
+ /*
+ * Default to 32 bits.
+ */
+ if (!name)
+ *bits = 32;
+
+ if (!name)
+ return stext.index;
+
+ if (!strcmp(name, ".text"))
+ return stext.index;
+ else if (!strcmp(name, ".data"))
+ return sdata.index;
+ else if (!strcmp(name, ".bss"))
+ return bssindex;
+ else
+ return NO_SEG;
+}
+
+static void aout_deflabel (char *name, long segment, long offset,
+ int is_global) {
+ int pos = strslen+4;
+ struct Symbol *sym;
+
+ if (name[0] == '.' && name[1] == '.') {
+ return;
+ }
+
+ saa_wbytes (strs, name, (long)(1+strlen(name)));
+ strslen += 1+strlen(name);
+
+ sym = saa_wstruct (syms);
+
+ sym->strpos = pos;
+ sym->type = is_global ? SYM_GLOBAL : 0;
+ if (segment == NO_SEG)
+ sym->type |= SECT_ABS;
+ else if (segment == stext.index)
+ sym->type |= SECT_TEXT;
+ else if (segment == sdata.index)
+ sym->type |= SECT_DATA;
+ else if (segment == bssindex)
+ sym->type |= SECT_BSS;
+ else
+ sym->type = SYM_GLOBAL;
+ if (is_global == 2)
+ sym->value = offset;
+ else
+ sym->value = (sym->type == SYM_GLOBAL ? 0 : offset);
+
+ /*
+ * define the references from external-symbol segment numbers
+ * to these symbol records.
+ */
+ if (segment != NO_SEG && segment != stext.index &&
+ segment != sdata.index && segment != bssindex)
+ bsym = raa_write (bsym, segment, nsyms);
+
+ nsyms++;
+}
+
+static void aout_add_reloc (struct Section *sect, long segment,
+ int relative, int bytes) {
+ struct Reloc *r;
+
+ r = *sect->tail = nasm_malloc(sizeof(struct Reloc));
+ sect->tail = &r->next;
+ r->next = NULL;
+
+ r->address = sect->len;
+ r->symbol = (segment == NO_SEG ? -SECT_ABS :
+ segment == stext.index ? -SECT_TEXT :
+ segment == sdata.index ? -SECT_DATA :
+ segment == bssindex ? -SECT_BSS :
+ raa_read(bsym, segment));
+ r->relative = relative;
+ r->bytes = bytes;
+
+ sect->nrelocs++;
+}
+
+static void aout_out (long segto, void *data, unsigned long type,
+ long segment, long wrt) {
+ struct Section *s;
+ long realbytes = type & OUT_SIZMASK;
+ unsigned char mydata[4], *p;
+
+ if (wrt != NO_SEG) {
+ wrt = NO_SEG; /* continue to do _something_ */
+ error (ERR_NONFATAL, "WRT not supported by a.out output format");
+ }
+
+ type &= OUT_TYPMASK;
+
+ /*
+ * handle absolute-assembly (structure definitions)
+ */
+ if (segto == NO_SEG) {
+ if (type != OUT_RESERVE)
+ error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]"
+ " space");
+ return;
+ }
+
+ if (segto == stext.index)
+ s = &stext;
+ else if (segto == sdata.index)
+ s = &sdata;
+ else if (segto == bssindex)
+ s = NULL;
+ else {
+ error(ERR_WARNING, "attempt to assemble code in"
+ " segment %d: defaulting to `.text'", segto);
+ s = &stext;
+ }
+
+ if (!s && type != OUT_RESERVE) {
+ error(ERR_WARNING, "attempt to initialise memory in the"
+ " BSS section: ignored");
+ if (type == OUT_REL2ADR)
+ realbytes = 2;
+ else if (type == OUT_REL4ADR)
+ realbytes = 4;
+ bsslen += realbytes;
+ return;
+ }
+
+ if (type == OUT_RESERVE) {
+ if (s) {
+ error(ERR_WARNING, "uninitialised space declared in"
+ " %s section: zeroing",
+ (segto == stext.index ? "code" : "data"));
+ aout_sect_write (s, NULL, realbytes);
+ } else
+ bsslen += realbytes;
+ } else if (type == OUT_RAWDATA) {
+ if (segment != NO_SEG)
+ error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
+ aout_sect_write (s, data, realbytes);
+ } else if (type == OUT_ADDRESS) {
+ if (segment != NO_SEG) {
+ if (segment % 2) {
+ error(ERR_NONFATAL, "a.out format does not support"
+ " segment base references");
+ } else
+ aout_add_reloc (s, segment, FALSE, realbytes);
+ }
+ p = mydata;
+ if (realbytes == 2)
+ WRITESHORT (p, *(long *)data);
+ else
+ WRITELONG (p, *(long *)data);
+ aout_sect_write (s, mydata, realbytes);
+ } else if (type == OUT_REL2ADR) {
+ if (segment == segto)
+ error(ERR_PANIC, "intra-segment OUT_REL2ADR");
+ if (segment != NO_SEG && segment % 2) {
+ error(ERR_NONFATAL, "a.out format does not support"
+ " segment base references");
+ } else
+ aout_add_reloc (s, segment, TRUE, 2);
+ p = mydata;
+ WRITESHORT (p, *(long*)data-(realbytes + s->len));
+ aout_sect_write (s, mydata, 2L);
+ } else if (type == OUT_REL4ADR) {
+ if (segment == segto)
+ error(ERR_PANIC, "intra-segment OUT_REL4ADR");
+ if (segment != NO_SEG && segment % 2) {
+ error(ERR_NONFATAL, "a.out format does not support"
+ " segment base references");
+ } else
+ aout_add_reloc (s, segment, TRUE, 4);
+ p = mydata;
+ WRITELONG (p, *(long*)data-(realbytes + s->len));
+ aout_sect_write (s, mydata, 4L);
+ }
+}
+
+static void aout_pad_sections(void) {
+ static unsigned char pad[] = { 0x90, 0x90, 0x90, 0x90 };
+ /*
+ * Pad each of the text and data sections with NOPs until their
+ * length is a multiple of four. (NOP == 0x90.) Also increase
+ * the length of the BSS section similarly.
+ */
+ aout_sect_write (&stext, pad, (-stext.len) & 3);
+ aout_sect_write (&sdata, pad, (-sdata.len) & 3);
+ bsslen = (bsslen + 3) & ~3;
+}
+
+/*
+ * a.out files have the curious property that all references to
+ * things in the data or bss sections are done by addresses which
+ * are actually relative to the start of the _text_ section, in the
+ * _file_. (No relation to what happens after linking. No idea why
+ * this should be so. It's very strange.) So we have to go through
+ * the relocation table, _after_ the final size of each section is
+ * known, and fix up the relocations pointed to.
+ */
+static void aout_fixup_relocs(struct Section *sect) {
+ struct Reloc *r;
+
+ saa_rewind (sect->data);
+ for (r = sect->head; r; r = r->next) {
+ unsigned char *p, *q, blk[4];
+ long l;
+
+ saa_fread (sect->data, r->address, blk, (long)r->bytes);
+ p = q = blk;
+ l = *p++;
+ l += ((long)*p++) << 8;
+ if (r->bytes == 4) {
+ l += ((long)*p++) << 16;
+ l += ((long)*p++) << 24;
+ }
+ if (r->symbol == -SECT_DATA)
+ l += stext.len;
+ else if (r->symbol == -SECT_BSS)
+ l += stext.len + sdata.len;
+ if (r->bytes == 4)
+ WRITELONG(q, l);
+ else
+ WRITESHORT(q, l);
+ saa_fwrite (sect->data, r->address, blk, (long)r->bytes);
+ }
+}
+
+static void aout_write(void) {
+ /*
+ * Emit the a.out header.
+ */
+ fwritelong (0x640107, aoutfp); /* OMAGIC, M_386, no flags */
+ fwritelong (stext.len, aoutfp);
+ fwritelong (sdata.len, aoutfp);
+ fwritelong (bsslen, aoutfp);
+ fwritelong (nsyms * 12, aoutfp); /* length of symbol table */
+ fwritelong (0L, aoutfp); /* object files have no entry point */
+ fwritelong (stext.nrelocs * 8, aoutfp); /* size of text relocs */
+ fwritelong (sdata.nrelocs * 8, aoutfp); /* size of data relocs */
+
+ /*
+ * Write out the code section and the data section.
+ */
+ saa_fpwrite (stext.data, aoutfp);
+ saa_fpwrite (sdata.data, aoutfp);
+
+ /*
+ * Write out the relocations.
+ */
+ aout_write_relocs (stext.head);
+ aout_write_relocs (sdata.head);
+
+ /*
+ * Write the symbol table.
+ */
+ aout_write_syms ();
+
+ /*
+ * And the string table.
+ */
+ fwritelong (strslen+4, aoutfp); /* length includes length count */
+ saa_fpwrite (strs, aoutfp);
+}
+
+static void aout_write_relocs (struct Reloc *r) {
+ while (r) {
+ unsigned long word2;
+
+ fwritelong (r->address, aoutfp);
+
+ if (r->symbol >= 0)
+ word2 = r->symbol | 0x8000000;
+ else
+ word2 = -r->symbol;
+ if (r->relative)
+ word2 |= 0x1000000;
+ word2 |= (r->bytes == 2 ? 0x2000000 : 0x4000000);
+ fwritelong (word2, aoutfp);
+
+ r = r->next;
+ }
+}
+
+static void aout_write_syms (void) {
+ int i;
+
+ saa_rewind (syms);
+ for (i=0; i<nsyms; i++) {
+ struct Symbol *sym = saa_rstruct(syms);
+ fwritelong (sym->strpos, aoutfp);
+ fwritelong ((long)sym->type, aoutfp);
+ /*
+ * Fix up the symbol value now we know the final section
+ * sizes.
+ */
+ if ((sym->type & SECT_MASK) == SECT_DATA)
+ sym->value += stext.len;
+ if ((sym->type & SECT_MASK) == SECT_BSS)
+ sym->value += stext.len + sdata.len;
+ fwritelong (sym->value, aoutfp);
+ }
+}
+
+static void aout_sect_write (struct Section *sect,
+ unsigned char *data, unsigned long len) {
+ saa_wbytes (sect->data, data, len);
+ sect->len += len;
+}
+
+static long aout_segbase (long segment) {
+ return segment;
+}
+
+static int aout_directive (char *directive, char *value, int pass) {
+ return 0;
+}
+
+static void aout_filename (char *inname, char *outname, efunc error) {
+ standard_extension (inname, outname, ".o", error);
+}
+
+struct ofmt of_aout = {
+ "GNU a.out (i386) object files (e.g. Linux)",
+ "aout",
+ aout_init,
+ aout_out,
+ aout_deflabel,
+ aout_section_names,
+ aout_segbase,
+ aout_directive,
+ aout_filename,
+ aout_cleanup
+};
+
+#endif /* OF_AOUT */
diff --git a/outas86.c b/outas86.c
new file mode 100644
index 0000000..82dedb2
--- /dev/null
+++ b/outas86.c
@@ -0,0 +1,548 @@
+/* outas86.c output routines for the Netwide Assembler to produce
+ * Linux as86 (bin86-0.3) object files
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "outform.h"
+
+#ifdef OF_AS86
+
+struct Piece {
+ struct Piece *next;
+ int type; /* 0 = absolute, 1 = seg, 2 = sym */
+ long offset; /* relative offset */
+ int number; /* symbol/segment number (4=bss) */
+ long bytes; /* size of reloc or of absolute data */
+ int relative; /* TRUE or FALSE */
+};
+
+struct Symbol {
+ long strpos; /* string table position of name */
+ int flags; /* symbol flags */
+ int segment; /* 4=bss at this point */
+ long value; /* address, or COMMON variable size */
+};
+
+/*
+ * Section IDs - used in Piece.number and Symbol.segment.
+ */
+#define SECT_TEXT 0 /* text section */
+#define SECT_DATA 3 /* data section */
+#define SECT_BSS 4 /* bss section */
+
+/*
+ * Flags used in Symbol.flags.
+ */
+#define SYM_ENTRY (1<<8)
+#define SYM_EXPORT (1<<7)
+#define SYM_IMPORT (1<<6)
+#define SYM_ABSOLUTE (1<<4)
+
+struct Section {
+ struct SAA *data;
+ unsigned long datalen, size, len;
+ long index;
+ struct Piece *head, *last, **tail;
+};
+
+static char as86_module[FILENAME_MAX];
+
+static struct Section stext, sdata;
+static unsigned long bsslen;
+static long bssindex;
+
+static struct SAA *syms;
+static unsigned long nsyms;
+
+static struct RAA *bsym;
+
+static struct SAA *strs;
+static unsigned long strslen;
+
+static int as86_reloc_size;
+
+static FILE *as86fp;
+static efunc error;
+
+static void as86_write(void);
+static void as86_write_section (struct Section *, int);
+static int as86_add_string (char *name);
+static void as86_sect_write(struct Section *, unsigned char *, unsigned long);
+
+static void as86_init(FILE *fp, efunc errfunc, ldfunc ldef) {
+ as86fp = fp;
+ error = errfunc;
+ (void) ldef; /* placate optimisers */
+ stext.data = saa_init(1L); stext.datalen = 0L;
+ stext.head = stext.last = NULL;
+ stext.tail = &stext.head;
+ sdata.data = saa_init(1L); sdata.datalen = 0L;
+ sdata.head = sdata.last = NULL;
+ sdata.tail = &sdata.head;
+ bsslen =
+ stext.len = stext.datalen = stext.size =
+ sdata.len = sdata.datalen = sdata.size = 0;
+ stext.index = seg_alloc();
+ sdata.index = seg_alloc();
+ bssindex = seg_alloc();
+ syms = saa_init((long)sizeof(struct Symbol));
+ nsyms = 0;
+ bsym = raa_init();
+ strs = saa_init(1L);
+ strslen = 0;
+
+ as86_add_string (as86_module);
+}
+
+static void as86_cleanup(void) {
+ struct Piece *p;
+
+ as86_write();
+ fclose (as86fp);
+ saa_free (stext.data);
+ while (stext.head) {
+ p = stext.head;
+ stext.head = stext.head->next;
+ nasm_free (p);
+ }
+ saa_free (sdata.data);
+ while (sdata.head) {
+ p = sdata.head;
+ sdata.head = sdata.head->next;
+ nasm_free (p);
+ }
+ saa_free (syms);
+ raa_free (bsym);
+ saa_free (strs);
+}
+
+static long as86_section_names (char *name, int pass, int *bits) {
+ /*
+ * Default is 16 bits.
+ */
+ if (!name)
+ *bits = 16;
+
+ if (!name)
+ return stext.index;
+
+ if (!strcmp(name, ".text"))
+ return stext.index;
+ else if (!strcmp(name, ".data"))
+ return sdata.index;
+ else if (!strcmp(name, ".bss"))
+ return bssindex;
+ else
+ return NO_SEG;
+}
+
+static int as86_add_string (char *name) {
+ int pos = strslen;
+ int length = strlen(name);
+
+ saa_wbytes (strs, name, (long)(length+1));
+ strslen += 1+length;
+
+ return pos;
+}
+
+static void as86_deflabel (char *name, long segment, long offset,
+ int is_global) {
+ struct Symbol *sym;
+
+ if (name[0] == '.' && name[1] == '.') {
+ return;
+ }
+
+ sym = saa_wstruct (syms);
+
+ sym->strpos = as86_add_string (name);
+ sym->flags = 0;
+ if (segment == NO_SEG)
+ sym->flags |= SYM_ABSOLUTE, sym->segment = 0;
+ else if (segment == stext.index)
+ sym->segment = SECT_TEXT;
+ else if (segment == sdata.index)
+ sym->segment = SECT_DATA;
+ else if (segment == bssindex)
+ sym->segment = SECT_BSS;
+ else {
+ sym->flags |= SYM_IMPORT;
+ sym->segment = 15;
+ }
+
+ if (is_global == 2)
+ sym->segment = 3; /* already have IMPORT */
+
+ if (is_global && !(sym->flags & SYM_IMPORT))
+ sym->flags |= SYM_EXPORT;
+
+ sym->value = offset;
+
+ /*
+ * define the references from external-symbol segment numbers
+ * to these symbol records.
+ */
+ if (segment != NO_SEG && segment != stext.index &&
+ segment != sdata.index && segment != bssindex)
+ bsym = raa_write (bsym, segment, nsyms);
+
+ nsyms++;
+}
+
+static void as86_add_piece (struct Section *sect, int type, long offset,
+ long segment, long bytes, int relative) {
+ struct Piece *p;
+
+ sect->len += bytes;
+
+ if (type == 0 && sect->last && sect->last->type == 0) {
+ sect->last->bytes += bytes;
+ return;
+ }
+
+ p = sect->last = *sect->tail = nasm_malloc(sizeof(struct Piece));
+ sect->tail = &p->next;
+ p->next = NULL;
+
+ p->type = type;
+ p->offset = offset;
+ p->bytes = bytes;
+ p->relative = relative;
+
+ if (type == 1 && segment == stext.index)
+ p->number = SECT_TEXT;
+ else if (type == 1 && segment == sdata.index)
+ p->number = SECT_DATA;
+ else if (type == 1 && segment == bssindex)
+ p->number = SECT_BSS;
+ else if (type == 1)
+ p->number = raa_read (bsym, segment), p->type = 2;
+}
+
+static void as86_out (long segto, void *data, unsigned long type,
+ long segment, long wrt) {
+ struct Section *s;
+ long realbytes = type & OUT_SIZMASK;
+ long offset;
+ unsigned char mydata[4], *p;
+
+ if (wrt != NO_SEG) {
+ wrt = NO_SEG; /* continue to do _something_ */
+ error (ERR_NONFATAL, "WRT not supported by as86 output format");
+ }
+
+ type &= OUT_TYPMASK;
+
+ /*
+ * handle absolute-assembly (structure definitions)
+ */
+ if (segto == NO_SEG) {
+ if (type != OUT_RESERVE)
+ error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]"
+ " space");
+ return;
+ }
+
+ if (segto == stext.index)
+ s = &stext;
+ else if (segto == sdata.index)
+ s = &sdata;
+ else if (segto == bssindex)
+ s = NULL;
+ else {
+ error(ERR_WARNING, "attempt to assemble code in"
+ " segment %d: defaulting to `.text'", segto);
+ s = &stext;
+ }
+
+ if (!s && type != OUT_RESERVE) {
+ error(ERR_WARNING, "attempt to initialise memory in the"
+ " BSS section: ignored");
+ if (type == OUT_REL2ADR)
+ realbytes = 2;
+ else if (type == OUT_REL4ADR)
+ realbytes = 4;
+ bsslen += realbytes;
+ return;
+ }
+
+ if (type == OUT_RESERVE) {
+ if (s) {
+ error(ERR_WARNING, "uninitialised space declared in"
+ " %s section: zeroing",
+ (segto == stext.index ? "code" : "data"));
+ as86_sect_write (s, NULL, realbytes);
+ as86_add_piece (s, 0, 0L, 0L, realbytes, 0);
+ } else
+ bsslen += realbytes;
+ } else if (type == OUT_RAWDATA) {
+ if (segment != NO_SEG)
+ error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
+ as86_sect_write (s, data, realbytes);
+ as86_add_piece (s, 0, 0L, 0L, realbytes, 0);
+ } else if (type == OUT_ADDRESS) {
+ if (segment != NO_SEG) {
+ if (segment % 2) {
+ error(ERR_NONFATAL, "as86 format does not support"
+ " segment base references");
+ } else{
+ offset = * (long *) data;
+ as86_add_piece (s, 1, offset, segment, realbytes, 0);
+ }
+ } else {
+ p = mydata;
+ WRITELONG (p, * (long *) data);
+ as86_sect_write (s, data, realbytes);
+ as86_add_piece (s, 0, 0L, 0L, realbytes, 0);
+ }
+ } else if (type == OUT_REL2ADR) {
+ if (segment == segto)
+ error(ERR_PANIC, "intra-segment OUT_REL2ADR");
+ if (segment != NO_SEG) {
+ if (segment % 2) {
+ error(ERR_NONFATAL, "as86 format does not support"
+ " segment base references");
+ } else {
+ offset = * (long *) data;
+ as86_add_piece (s, 1, offset-realbytes+2, segment, 2L, 1);
+ }
+ }
+ } else if (type == OUT_REL4ADR) {
+ if (segment == segto)
+ error(ERR_PANIC, "intra-segment OUT_REL4ADR");
+ if (segment != NO_SEG) {
+ if (segment % 2) {
+ error(ERR_NONFATAL, "as86 format does not support"
+ " segment base references");
+ } else {
+ offset = * (long *) data;
+ as86_add_piece (s, 1, offset-realbytes+4, segment, 4L, 1);
+ }
+ }
+ }
+}
+
+static void as86_write(void) {
+ int i;
+ long symlen, seglen, segsize;
+
+ /*
+ * First, go through the symbol records working out how big
+ * each will be. Also fix up BSS references at this time, and
+ * set the flags words up completely.
+ */
+ symlen = 0;
+ saa_rewind (syms);
+ for (i = 0; i < nsyms; i++) {
+ struct Symbol *sym = saa_rstruct (syms);
+ if (sym->segment == SECT_BSS)
+ sym->segment = SECT_DATA, sym->value += sdata.len;
+ sym->flags |= sym->segment;
+ if (sym->value == 0)
+ sym->flags |= 0 << 14, symlen += 4;
+ else if (sym->value >= 0 && sym->value <= 255)
+ sym->flags |= 1 << 14, symlen += 5;
+ else if (sym->value >= 0 && sym->value <= 65535)
+ sym->flags |= 2 << 14, symlen += 6;
+ else
+ sym->flags |= 3 << 14, symlen += 8;
+ }
+
+ /*
+ * Now do the same for the segments, and get the segment size
+ * descriptor word at the same time.
+ */
+ seglen = segsize = 0;
+ if ((unsigned long) stext.len > 65535)
+ segsize |= 0x03000000, seglen += 4;
+ else
+ segsize |= 0x02000000, seglen += 2;
+ if ((unsigned long) sdata.len > 65535)
+ segsize |= 0xC0000000, seglen += 4;
+ else
+ segsize |= 0x80000000, seglen += 2;
+
+ /*
+ * Emit the as86 header.
+ */
+ fwritelong (0x000186A3, as86fp);
+ fputc (0x2A, as86fp);
+ fwritelong (27+symlen+seglen+strslen, as86fp); /* header length */
+ fwritelong (stext.len+sdata.len, as86fp);
+ fwriteshort (strslen, as86fp);
+ fwriteshort (0, as86fp); /* class = revision = 0 */
+ fwritelong (0x55555555, as86fp); /* segment max sizes: always this */
+ fwritelong (segsize, as86fp); /* segment size descriptors */
+ if (segsize & 0x01000000)
+ fwritelong (stext.len, as86fp);
+ else
+ fwriteshort (stext.len, as86fp);
+ if (segsize & 0x40000000)
+ fwritelong (sdata.len, as86fp);
+ else
+ fwriteshort (sdata.len, as86fp);
+ fwriteshort (nsyms, as86fp);
+
+ /*
+ * Write the symbol table.
+ */
+ saa_rewind (syms);
+ for (i = 0; i < nsyms; i++) {
+ struct Symbol *sym = saa_rstruct (syms);
+ fwriteshort (sym->strpos, as86fp);
+ fwriteshort (sym->flags, as86fp);
+ switch (sym->flags & (3<<14)) {
+ case 0<<14: break;
+ case 1<<14: fputc (sym->value, as86fp); break;
+ case 2<<14: fwriteshort (sym->value, as86fp); break;
+ case 3<<14: fwritelong (sym->value, as86fp); break;
+ }
+ }
+
+ /*
+ * Write out the string table.
+ */
+ saa_fpwrite (strs, as86fp);
+
+ /*
+ * Write the program text.
+ */
+ as86_reloc_size = -1;
+ as86_write_section (&stext, SECT_TEXT);
+ as86_write_section (&sdata, SECT_DATA);
+ fputc (0, as86fp); /* termination */
+}
+
+static void as86_set_rsize (int size) {
+ if (as86_reloc_size != size) {
+ switch (as86_reloc_size = size) {
+ case 1: fputc (0x01, as86fp); break; /* shouldn't happen */
+ case 2: fputc (0x02, as86fp); break;
+ case 4: fputc (0x03, as86fp); break;
+ default: error (ERR_PANIC, "bizarre relocation size %d", size);
+ }
+ }
+}
+
+static void as86_write_section (struct Section *sect, int index) {
+ struct Piece *p;
+ unsigned long s;
+ long length;
+
+ fputc (0x20+index, as86fp); /* select the right section */
+
+ saa_rewind (sect->data);
+
+ for (p = sect->head; p; p = p->next)
+ switch (p->type) {
+ case 0:
+ /*
+ * Absolute data. Emit it in chunks of at most 64
+ * bytes.
+ */
+ length = p->bytes;
+ do {
+ char buf[64];
+ long tmplen = (length > 64 ? 64 : length);
+ fputc (0x40 | (tmplen & 0x3F), as86fp);
+ saa_rnbytes (sect->data, buf, tmplen);
+ fwrite (buf, 1, tmplen, as86fp);
+ length -= tmplen;
+ } while (length > 0);
+ break;
+ case 1:
+ /*
+ * A segment-type relocation. First fix up the BSS.
+ */
+ if (p->number == SECT_BSS)
+ p->number = SECT_DATA, p->offset += sdata.len;
+ as86_set_rsize (p->bytes);
+ fputc (0x80 | (p->relative ? 0x20 : 0) | p->number, as86fp);
+ if (as86_reloc_size == 2)
+ fwriteshort (p->offset, as86fp);
+ else
+ fwritelong (p->offset, as86fp);
+ break;
+ case 2:
+ /*
+ * A symbol-type relocation.
+ */
+ as86_set_rsize (p->bytes);
+ s = p->offset;
+ if (s > 65535)
+ s = 3;
+ else if (s > 255)
+ s = 2;
+ else if (s > 0)
+ s = 1;
+ else
+ s = 0;
+ fputc (0xC0 |
+ (p->relative ? 0x20 : 0) |
+ (p->number > 255 ? 0x04 : 0) | s, as86fp);
+ if (p->number > 255)
+ fwriteshort (p->number, as86fp);
+ else
+ fputc (p->number, as86fp);
+ switch ((int)s) {
+ case 0: break;
+ case 1: fputc (p->offset, as86fp); break;
+ case 2: fwriteshort (p->offset, as86fp); break;
+ case 3: fwritelong (p->offset, as86fp); break;
+ }
+ break;
+ }
+}
+
+static void as86_sect_write (struct Section *sect,
+ unsigned char *data, unsigned long len) {
+ saa_wbytes (sect->data, data, len);
+ sect->datalen += len;
+}
+
+static long as86_segbase (long segment) {
+ return segment;
+}
+
+static int as86_directive (char *directive, char *value, int pass) {
+ return 0;
+}
+
+static void as86_filename (char *inname, char *outname, efunc error) {
+ char *p;
+
+ if ( (p = strrchr (inname, '.')) != NULL) {
+ strncpy (as86_module, inname, p-inname);
+ as86_module[p-inname] = '\0';
+ } else
+ strcpy (as86_module, inname);
+
+ standard_extension (inname, outname, ".o", error);
+}
+
+struct ofmt of_as86 = {
+ "Linux as86 (bin86 version 0.3) object files",
+ "as86",
+ as86_init,
+ as86_out,
+ as86_deflabel,
+ as86_section_names,
+ as86_segbase,
+ as86_directive,
+ as86_filename,
+ as86_cleanup
+};
+
+#endif /* OF_AS86 */
diff --git a/outbin.c b/outbin.c
new file mode 100644
index 0000000..82c8510
--- /dev/null
+++ b/outbin.c
@@ -0,0 +1,303 @@
+/* outbin.c output routines for the Netwide Assembler to produce
+ * flat-form binary files
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "nasm.h"
+#include "nasmlib.h"
+#include "outform.h"
+
+#ifdef OF_BIN
+
+static FILE *fp;
+static efunc error;
+
+static struct Section {
+ struct SAA *contents;
+ long length;
+ long index;
+} textsect, datasect;
+static long bsslen, bssindex;
+
+static struct Reloc {
+ struct Reloc *next;
+ long posn;
+ long bytes;
+ long secref;
+ long secrel;
+ struct Section *target;
+} *relocs, **reloctail;
+
+static int start_point;
+
+static void add_reloc (struct Section *s, long bytes, long secref,
+ long secrel) {
+ struct Reloc *r;
+
+ r = *reloctail = nasm_malloc(sizeof(struct Reloc));
+ reloctail = &r->next;
+ r->next = NULL;
+ r->posn = s->length;
+ r->bytes = bytes;
+ r->secref = secref;
+ r->secrel = secrel;
+ r->target = s;
+}
+
+static void bin_init (FILE *afp, efunc errfunc, ldfunc ldef) {
+ fp = afp;
+
+ error = errfunc;
+ (void) ldef; /* placate optimisers */
+
+ start_point = 0; /* default */
+ textsect.contents = saa_init(1L);
+ datasect.contents = saa_init(1L);
+ textsect.length = datasect.length = 0;
+ textsect.index = seg_alloc();
+ datasect.index = seg_alloc();
+ bsslen = 0;
+ bssindex = seg_alloc();
+ relocs = NULL;
+ reloctail = &relocs;
+}
+
+static void bin_cleanup (void) {
+ struct Reloc *r;
+ long datapos, dataalign, bsspos;
+
+ datapos = (start_point + textsect.length + 3) & ~3;/* align on 4 bytes */
+ dataalign = datapos - (start_point + textsect.length);
+
+ saa_rewind (textsect.contents);
+ saa_rewind (datasect.contents);
+
+ bsspos = (datapos + datasect.length + 3) & ~3;
+
+ for (r = relocs; r; r = r->next) {
+ unsigned char *p, *q, mydata[4];
+ long l;
+
+ saa_fread (r->target->contents, r->posn, mydata, r->bytes);
+ p = q = mydata;
+ l = *p++;
+ l += ((long)*p++) << 8;
+ if (r->bytes == 4) {
+ l += ((long)*p++) << 16;
+ l += ((long)*p++) << 24;
+ }
+
+ if (r->secref == textsect.index)
+ l += start_point;
+ else if (r->secref == datasect.index)
+ l += datapos;
+ else if (r->secref == bssindex)
+ l += bsspos;
+
+ if (r->secrel == textsect.index)
+ l -= start_point;
+ else if (r->secrel == datasect.index)
+ l -= datapos;
+ else if (r->secrel == bssindex)
+ l -= bsspos;
+
+ if (r->bytes == 4)
+ WRITELONG(q, l);
+ else
+ WRITESHORT(q, l);
+ saa_fwrite (r->target->contents, r->posn, mydata, r->bytes);
+ }
+ saa_fpwrite (textsect.contents, fp);
+ if (datasect.length > 0) {
+ fwrite ("\0\0\0\0", dataalign, 1, fp);
+ saa_fpwrite (datasect.contents, fp);
+ }
+ fclose (fp);
+ saa_free (textsect.contents);
+ saa_free (datasect.contents);
+ while (relocs) {
+ r = relocs->next;
+ nasm_free (relocs);
+ relocs = r;
+ }
+}
+
+static void bin_out (long segto, void *data, unsigned long type,
+ long segment, long wrt) {
+ unsigned char *p, mydata[4];
+ struct Section *s;
+ long realbytes;
+
+ if (wrt != NO_SEG) {
+ wrt = NO_SEG; /* continue to do _something_ */
+ error (ERR_NONFATAL, "WRT not supported by binary output format");
+ }
+
+ /*
+ * handle absolute-assembly (structure definitions)
+ */
+ if (segto == NO_SEG) {
+ if ((type & OUT_TYPMASK) != OUT_RESERVE)
+ error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]"
+ " space");
+ return;
+ }
+
+ if (segto == bssindex) { /* BSS */
+ if ((type & OUT_TYPMASK) != OUT_RESERVE)
+ error(ERR_WARNING, "attempt to initialise memory in the"
+ " BSS section: ignored");
+ s = NULL;
+ } else if (segto == textsect.index) {
+ s = &textsect;
+ } else if (segto == datasect.index) {
+ s = &datasect;
+ } else {
+ error(ERR_WARNING, "attempt to assemble code in"
+ " segment %d: defaulting to `.text'", segto);
+ s = &textsect;
+ }
+
+ if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
+ if (segment != NO_SEG &&
+ segment != textsect.index &&
+ segment != datasect.index &&
+ segment != bssindex) {
+ if (segment % 2)
+ error(ERR_NONFATAL, "binary output format does not support"
+ " segment base references");
+ else
+ error(ERR_NONFATAL, "binary output format does not support"
+ " external references");
+ segment = NO_SEG;
+ }
+ if (s) {
+ if (segment != NO_SEG)
+ add_reloc (s, type & OUT_SIZMASK, segment, -1L);
+ p = mydata;
+ if ((type & OUT_SIZMASK) == 4)
+ WRITELONG (p, *(long *)data);
+ else
+ WRITESHORT (p, *(long *)data);
+ saa_wbytes (s->contents, mydata, type & OUT_SIZMASK);
+ s->length += type & OUT_SIZMASK;
+ } else
+ bsslen += type & OUT_SIZMASK;
+ } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
+ type &= OUT_SIZMASK;
+ p = data;
+ if (s) {
+ saa_wbytes (s->contents, data, type);
+ s->length += type;
+ } else
+ bsslen += type;
+ } else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
+ if (s) {
+ error(ERR_WARNING, "uninitialised space declared in"
+ " %s section: zeroing",
+ (segto == textsect.index ? "code" : "data"));
+ }
+ type &= OUT_SIZMASK;
+ if (s) {
+ saa_wbytes (s->contents, NULL, type);
+ s->length += type;
+ } else
+ bsslen += type;
+ } else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
+ (type & OUT_TYPMASK) == OUT_REL4ADR) {
+ realbytes = ((type & OUT_TYPMASK) == OUT_REL4ADR ? 4 : 2);
+ if (segment != NO_SEG &&
+ segment != textsect.index &&
+ segment != datasect.index &&
+ segment != bssindex) {
+ if (segment % 2)
+ error(ERR_NONFATAL, "binary output format does not support"
+ " segment base references");
+ else
+ error(ERR_NONFATAL, "binary output format does not support"
+ " external references");
+ segment = NO_SEG;
+ }
+ if (s) {
+ add_reloc (s, realbytes, segment, segto);
+ p = mydata;
+ if (realbytes == 4)
+ WRITELONG (p, *(long*)data - realbytes - s->length);
+ else
+ WRITESHORT (p, *(long*)data - realbytes - s->length);
+ saa_wbytes (s->contents, mydata, realbytes);
+ s->length += realbytes;
+ } else
+ bsslen += realbytes;
+ }
+}
+
+static void bin_deflabel (char *name, long segment, long offset,
+ int is_global) {
+ if (is_global == 2) {
+ error (ERR_NONFATAL, "binary output format does not support common"
+ " variables");
+ }
+}
+
+static long bin_secname (char *name, int pass, int *bits) {
+ /*
+ * Default is 16 bits.
+ */
+ if (!name)
+ *bits = 16;
+
+ if (!name)
+ return textsect.index;
+
+ if (!strcmp(name, ".text"))
+ return textsect.index;
+ else if (!strcmp(name, ".data"))
+ return datasect.index;
+ else if (!strcmp(name, ".bss"))
+ return bssindex;
+ else
+ return NO_SEG;
+}
+
+static long bin_segbase (long segment) {
+ return segment;
+}
+
+static int bin_directive (char *directive, char *value, int pass) {
+ int rn_error;
+
+ if (!strcmp(directive, "org")) {
+ start_point = readnum (value, &rn_error);
+ if (rn_error)
+ error (ERR_NONFATAL, "argument to ORG should be numeric");
+ return 1;
+ } else
+ return 0;
+}
+
+static void bin_filename (char *inname, char *outname, efunc error) {
+ standard_extension (inname, outname, "", error);
+}
+
+struct ofmt of_bin = {
+ "flat-form binary files (e.g. DOS .COM, .SYS)",
+ "bin",
+ bin_init,
+ bin_out,
+ bin_deflabel,
+ bin_secname,
+ bin_segbase,
+ bin_directive,
+ bin_filename,
+ bin_cleanup
+};
+
+#endif /* OF_BIN */
diff --git a/outcoff.c b/outcoff.c
new file mode 100644
index 0000000..c3ae712
--- /dev/null
+++ b/outcoff.c
@@ -0,0 +1,611 @@
+/* outcoff.c output routines for the Netwide Assembler to produce
+ * COFF object files (for DJGPP and Win32)
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <time.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "outform.h"
+
+#if defined(OF_COFF) || defined(OF_WIN32)
+
+/*
+ * Notes on COFF:
+ *
+ * (0) When I say `standard COFF' below, I mean `COFF as output and
+ * used by DJGPP'. I assume DJGPP gets it right.
+ *
+ * (1) Win32 appears to interpret the term `relative relocation'
+ * differently from standard COFF. Standard COFF understands a
+ * relative relocation to mean that during relocation you add the
+ * address of the symbol you're referencing, and subtract the base
+ * address of the section you're in. Win32 COFF, by contrast, seems
+ * to add the address of the symbol and then subtract the address
+ * of THE BYTE AFTER THE RELOCATED DWORD. Hence the two formats are
+ * subtly incompatible.
+ *
+ * (2) Win32 doesn't bother putting any flags in the header flags
+ * field (at offset 0x12 into the file).
+ *
+ * (3) Win32 puts some weird flags into the section header table.
+ * It uses flags 0x80000000 (writable), 0x40000000 (readable) and
+ * 0x20000000 (executable) in the expected combinations, which
+ * standard COFF doesn't seem to bother with, but it also does
+ * something else strange: it also flags code sections as
+ * 0x00500000 and data/bss as 0x00300000. Even Microsoft's
+ * documentation doesn't explain what these things mean. I just go
+ * ahead and use them anyway - it seems to work.
+ *
+ * (4) Both standard COFF and Win32 COFF seem to use the DWORD
+ * field directly after the section name in the section header
+ * table for something strange: they store what the address of the
+ * section start point _would_ be, if you laid all the sections end
+ * to end starting at zero. Dunno why. Microsoft's documentation
+ * lists this field as "Virtual Size of Section", which doesn't
+ * seem to fit at all. In fact, Win32 even includes non-linked
+ * sections such as .drectve in this calculation. Not that I can be
+ * bothered with those things anyway.
+ *
+ * (5) Standard COFF does something very strange to common
+ * variables: the relocation point for a common variable is as far
+ * _before_ the variable as its size stretches out _after_ it. So
+ * we must fix up common variable references. Win32 seems to be
+ * sensible on this one.
+ */
+
+/* Flag which version of COFF we are currently outputting. */
+static int win32;
+
+struct Reloc {
+ struct Reloc *next;
+ long address; /* relative to _start_ of section */
+ long symbol; /* symbol number */
+ int relative; /* TRUE or FALSE */
+};
+
+struct Symbol {
+ char name[9];
+ long strpos; /* string table position of name */
+ int section; /* section number where it's defined
+ * - in COFF codes, not NASM codes */
+ int is_global; /* is it a global symbol or not? */
+ long value; /* address, or COMMON variable size */
+};
+
+static FILE *coffp;
+static efunc error;
+static char coff_infile[FILENAME_MAX];
+
+struct Section {
+ struct SAA *data;
+ unsigned long len;
+ int nrelocs;
+ long index;
+ struct Reloc *head, **tail;
+};
+
+static struct Section stext, sdata;
+static unsigned long bsslen;
+static long bssindex;
+
+static struct SAA *syms;
+static unsigned long nsyms;
+
+static struct RAA *bsym, *symval;
+
+static struct SAA *strs;
+static unsigned long strslen;
+
+/*
+ * The symbol table contains a double entry for the file name, a
+ * double entry for each of the three sections, and an absolute
+ * symbol referencing address zero, followed by the _real_ symbols.
+ * That's nine extra symbols.
+ */
+#define SYM_INITIAL 9
+
+/*
+ * Symbol table indices we can relocate relative to.
+ */
+#define SYM_ABS_SEG 8
+#define SYM_TEXT_SEG 2
+#define SYM_DATA_SEG 4
+#define SYM_BSS_SEG 6
+
+/*
+ * The section header table ends at this offset: 0x14 for the
+ * header, plus 0x28 for each of three sections.
+ */
+#define COFF_HDRS_END 0x8c
+
+static void coff_gen_init(FILE *, efunc);
+static void coff_sect_write (struct Section *, unsigned char *,
+ unsigned long);
+static void coff_write (void);
+static void coff_section_header (char *, long, long, long, long, int, long);
+static void coff_write_relocs (struct Section *);
+static void coff_write_symbols (void);
+
+static void coff_win32_init(FILE *fp, efunc errfunc, ldfunc ldef) {
+ win32 = TRUE;
+ (void) ldef; /* placate optimisers */
+ coff_gen_init(fp, errfunc);
+}
+
+static void coff_std_init(FILE *fp, efunc errfunc, ldfunc ldef) {
+ win32 = FALSE;
+ (void) ldef; /* placate optimisers */
+ coff_gen_init(fp, errfunc);
+}
+
+static void coff_gen_init(FILE *fp, efunc errfunc) {
+ coffp = fp;
+ error = errfunc;
+ stext.data = saa_init(1L); stext.head = NULL; stext.tail = &stext.head;
+ sdata.data = saa_init(1L); sdata.head = NULL; sdata.tail = &sdata.head;
+ stext.len = sdata.len = bsslen = 0;
+ stext.nrelocs = sdata.nrelocs = 0;
+ stext.index = seg_alloc();
+ sdata.index = seg_alloc();
+ bssindex = seg_alloc();
+ syms = saa_init((long)sizeof(struct Symbol));
+ nsyms = 0;
+ bsym = raa_init();
+ symval = raa_init();
+ strs = saa_init(1L);
+ strslen = 0;
+}
+
+static void coff_cleanup(void) {
+ struct Reloc *r;
+
+ coff_write();
+ fclose (coffp);
+ saa_free (stext.data);
+ while (stext.head) {
+ r = stext.head;
+ stext.head = stext.head->next;
+ nasm_free (r);
+ }
+ saa_free (sdata.data);
+ while (sdata.head) {
+ r = sdata.head;
+ sdata.head = sdata.head->next;
+ nasm_free (r);
+ }
+ saa_free (syms);
+ raa_free (bsym);
+ raa_free (symval);
+ saa_free (strs);
+}
+
+static long coff_section_names (char *name, int pass, int *bits) {
+ /*
+ * Default is 32 bits.
+ */
+ if (!name)
+ *bits = 32;
+
+ if (!name)
+ return stext.index;
+
+ if (!strcmp(name, ".text"))
+ return stext.index;
+ else if (!strcmp(name, ".data"))
+ return sdata.index;
+ else if (!strcmp(name, ".bss"))
+ return bssindex;
+ else
+ return NO_SEG;
+}
+
+static void coff_deflabel (char *name, long segment, long offset,
+ int is_global) {
+ int pos = strslen+4;
+ struct Symbol *sym;
+
+ if (name[0] == '.' && name[1] == '.') {
+ return;
+ }
+
+ if (strlen(name) > 8) {
+ saa_wbytes (strs, name, (long)(1+strlen(name)));
+ strslen += 1+strlen(name);
+ } else
+ pos = -1;
+
+ sym = saa_wstruct (syms);
+
+ sym->strpos = pos;
+ if (pos == -1)
+ strcpy (sym->name, name);
+ sym->is_global = !!is_global;
+ if (segment == NO_SEG)
+ sym->section = -1; /* absolute symbol */
+ else if (segment == stext.index)
+ sym->section = 1; /* .text */
+ else if (segment == sdata.index)
+ sym->section = 2; /* .data */
+ else if (segment == bssindex)
+ sym->section = 3; /* .bss */
+ else {
+ sym->section = 0; /* undefined */
+ sym->is_global = TRUE;
+ }
+ if (is_global == 2)
+ sym->value = offset;
+ else
+ sym->value = (sym->section == 0 ? 0 : offset);
+
+ /*
+ * define the references from external-symbol segment numbers
+ * to these symbol records.
+ */
+ if (segment != NO_SEG && segment != stext.index &&
+ segment != sdata.index && segment != bssindex)
+ bsym = raa_write (bsym, segment, nsyms);
+
+ if (segment != NO_SEG)
+ symval = raa_write (symval, segment, sym->section ? 0 : sym->value);
+
+ nsyms++;
+}
+
+static long coff_add_reloc (struct Section *sect, long segment,
+ int relative) {
+ struct Reloc *r;
+
+ r = *sect->tail = nasm_malloc(sizeof(struct Reloc));
+ sect->tail = &r->next;
+ r->next = NULL;
+
+ r->address = sect->len;
+ r->symbol = (segment == NO_SEG ? SYM_ABS_SEG :
+ segment == stext.index ? SYM_TEXT_SEG :
+ segment == sdata.index ? SYM_DATA_SEG :
+ segment == bssindex ? SYM_BSS_SEG :
+ raa_read (bsym, segment) + SYM_INITIAL);
+ r->relative = relative;
+
+ sect->nrelocs++;
+
+ /*
+ * Return the fixup for standard COFF common variables.
+ */
+ if (r->symbol >= SYM_INITIAL && !win32)
+ return raa_read (symval, segment);
+ else
+ return 0;
+}
+
+static void coff_out (long segto, void *data, unsigned long type,
+ long segment, long wrt) {
+ struct Section *s;
+ long realbytes = type & OUT_SIZMASK;
+ unsigned char mydata[4], *p;
+
+ if (wrt != NO_SEG) {
+ wrt = NO_SEG; /* continue to do _something_ */
+ error (ERR_NONFATAL, "WRT not supported by COFF output formats");
+ }
+
+ type &= OUT_TYPMASK;
+
+ /*
+ * handle absolute-assembly (structure definitions)
+ */
+ if (segto == NO_SEG) {
+ if (type != OUT_RESERVE)
+ error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]"
+ " space");
+ return;
+ }
+
+ if (segto == stext.index)
+ s = &stext;
+ else if (segto == sdata.index)
+ s = &sdata;
+ else if (segto == bssindex)
+ s = NULL;
+ else {
+ error(ERR_WARNING, "attempt to assemble code in"
+ " segment %d: defaulting to `.text'", segto);
+ s = &stext;
+ }
+
+ if (!s && type != OUT_RESERVE) {
+ error(ERR_WARNING, "attempt to initialise memory in the"
+ " BSS section: ignored");
+ if (type == OUT_REL2ADR)
+ realbytes = 2;
+ else if (type == OUT_REL4ADR)
+ realbytes = 4;
+ bsslen += realbytes;
+ return;
+ }
+
+ if (type == OUT_RESERVE) {
+ if (s) {
+ error(ERR_WARNING, "uninitialised space declared in"
+ " %s section: zeroing",
+ (segto == stext.index ? "code" : "data"));
+ coff_sect_write (s, NULL, realbytes);
+ } else
+ bsslen += realbytes;
+ } else if (type == OUT_RAWDATA) {
+ if (segment != NO_SEG)
+ error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
+ coff_sect_write (s, data, realbytes);
+ } else if (type == OUT_ADDRESS) {
+ if (realbytes == 2 && (segment != NO_SEG || wrt != NO_SEG))
+ error(ERR_NONFATAL, "COFF format does not support 16-bit"
+ " relocations");
+ else {
+ long fix = 0;
+ if (segment != NO_SEG || wrt != NO_SEG) {
+ if (wrt != NO_SEG) {
+ error(ERR_NONFATAL, "COFF format does not support"
+ " WRT types");
+ } else if (segment % 2) {
+ error(ERR_NONFATAL, "COFF format does not support"
+ " segment base references");
+ } else
+ fix = coff_add_reloc (s, segment, FALSE);
+ }
+ p = mydata;
+ WRITELONG (p, *(long *)data + fix);
+ coff_sect_write (s, mydata, realbytes);
+ }
+ } else if (type == OUT_REL2ADR) {
+ error(ERR_NONFATAL, "COFF format does not support 16-bit"
+ " relocations");
+ } else if (type == OUT_REL4ADR) {
+ if (segment == segto)
+ error(ERR_PANIC, "intra-segment OUT_REL4ADR");
+ else if (segment == NO_SEG && win32)
+ error(ERR_NONFATAL, "Win32 COFF does not correctly support"
+ " relative references to absolute addresses");
+ else {
+ long fix = 0;
+ if (segment != NO_SEG && segment % 2) {
+ error(ERR_NONFATAL, "COFF format does not support"
+ " segment base references");
+ } else
+ fix = coff_add_reloc (s, segment, TRUE);
+ p = mydata;
+ if (win32) {
+ WRITELONG (p, *(long*)data + 4 - realbytes + fix);
+ } else {
+ WRITELONG (p, *(long*)data-(realbytes + s->len) + fix);
+ }
+ coff_sect_write (s, mydata, 4L);
+ }
+ }
+}
+
+static void coff_sect_write (struct Section *sect,
+ unsigned char *data, unsigned long len) {
+ saa_wbytes (sect->data, data, len);
+ sect->len += len;
+}
+
+static int coff_directives (char *directive, char *value, int pass) {
+ return 0;
+}
+
+static void coff_write (void) {
+ long textpos, textrelpos, datapos, datarelpos, sympos;
+
+ /*
+ * Work out how big the file will get.
+ */
+ textpos = COFF_HDRS_END;
+ textrelpos = textpos + stext.len;
+ datapos = textrelpos + stext.nrelocs * 10;
+ datarelpos = datapos + sdata.len;
+ sympos = datarelpos + sdata.nrelocs * 10;
+
+ /*
+ * Output the COFF header.
+ */
+ fwriteshort (0x14C, coffp); /* MACHINE_i386 */
+ fwriteshort (3, coffp); /* number of sections */
+ fwritelong (time(NULL), coffp); /* time stamp */
+ fwritelong (sympos, coffp);
+ fwritelong (nsyms + SYM_INITIAL, coffp);
+ fwriteshort (0, coffp); /* no optional header */
+ /* Flags: 32-bit, no line numbers. Win32 doesn't even bother with them. */
+ fwriteshort (win32 ? 0 : 0x104, coffp);
+
+ /*
+ * Output the section headers.
+ */
+
+ coff_section_header (".text", 0L, stext.len, textpos,
+ textrelpos, stext.nrelocs,
+ (win32 ? 0x60500020L : 0x20L));
+ coff_section_header (".data", stext.len, sdata.len, datapos,
+ datarelpos, sdata.nrelocs,
+ (win32 ? 0xC0300040L : 0x40L));
+ coff_section_header (".bss", stext.len+sdata.len, bsslen, 0L, 0L, 0,
+ (win32 ? 0xC0300080L : 0x80L));
+
+ /*
+ * Output the text section, and its relocations.
+ */
+ saa_fpwrite (stext.data, coffp);
+ coff_write_relocs (&stext);
+
+ /*
+ * Output the data section, and its relocations.
+ */
+ saa_fpwrite (sdata.data, coffp);
+ coff_write_relocs (&sdata);
+
+ /*
+ * Output the symbol and string tables.
+ */
+ coff_write_symbols();
+ fwritelong (strslen+4, coffp); /* length includes length count */
+ saa_fpwrite (strs, coffp);
+}
+
+static void coff_section_header (char *name, long vsize,
+ long datalen, long datapos,
+ long relpos, int nrelocs, long flags) {
+ char padname[8];
+
+ memset (padname, 0, 8);
+ strncpy (padname, name, 8);
+ fwrite (padname, 8, 1, coffp);
+ fwritelong (vsize, coffp);
+ fwritelong (0L, coffp); /* RVA/offset - we ignore */
+ fwritelong (datalen, coffp);
+ fwritelong (datapos, coffp);
+ fwritelong (relpos, coffp);
+ fwritelong (0L, coffp); /* no line numbers - we don't do 'em */
+ fwriteshort (nrelocs, coffp);
+ fwriteshort (0, coffp); /* again, no line numbers */
+ fwritelong (flags, coffp);
+}
+
+static void coff_write_relocs (struct Section *s) {
+ struct Reloc *r;
+
+ for (r = s->head; r; r = r->next) {
+ fwritelong (r->address, coffp);
+ fwritelong (r->symbol, coffp);
+ /*
+ * Strange: Microsoft's COFF documentation says 0x03 for an
+ * absolute relocation, but both Visual C++ and DJGPP agree
+ * that in fact it's 0x06. I'll use 0x06 until someone
+ * argues.
+ */
+ fwriteshort (r->relative ? 0x14 : 0x06, coffp);
+ }
+}
+
+static void coff_symbol (char *name, long strpos, long value,
+ int section, int type, int aux) {
+ char padname[8];
+
+ if (name) {
+ memset (padname, 0, 8);
+ strncpy (padname, name, 8);
+ fwrite (padname, 8, 1, coffp);
+ } else {
+ fwritelong (0L, coffp);
+ fwritelong (strpos, coffp);
+ }
+ fwritelong (value, coffp);
+ fwriteshort (section, coffp);
+ fwriteshort (0, coffp);
+ fputc (type, coffp);
+ fputc (aux, coffp);
+}
+
+static void coff_write_symbols (void) {
+ char filename[18];
+ int i;
+
+ /*
+ * The `.file' record, and the file name auxiliary record.
+ */
+ coff_symbol (".file", 0L, 0L, -2, 0x67, 1);
+ memset (filename, 0, 18);
+ strncpy (filename, coff_infile, 18);
+ fwrite (filename, 18, 1, coffp);
+
+ /*
+ * The section records, with their auxiliaries.
+ */
+ memset (filename, 0, 18); /* useful zeroed buffer */
+
+ coff_symbol (".text", 0L, 0L, 1, 3, 1);
+ fwritelong (stext.len, coffp);
+ fwriteshort (stext.nrelocs, coffp);
+ fwrite (filename, 12, 1, coffp);
+ coff_symbol (".data", 0L, 0L, 2, 3, 1);
+ fwritelong (sdata.len, coffp);
+ fwriteshort (sdata.nrelocs, coffp);
+ fwrite (filename, 12, 1, coffp);
+ coff_symbol (".bss", 0L, 0L, 3, 3, 1);
+ fwritelong (bsslen, coffp);
+ fwrite (filename, 14, 1, coffp);
+
+ /*
+ * The absolute symbol, for relative-to-absolute relocations.
+ */
+ coff_symbol (".absolut", 0L, 0L, -1, 3, 0);
+
+ /*
+ * The real symbols.
+ */
+ saa_rewind (syms);
+ for (i=0; i<nsyms; i++) {
+ struct Symbol *sym = saa_rstruct (syms);
+ coff_symbol (sym->strpos == -1 ? sym->name : NULL,
+ sym->strpos, sym->value, sym->section,
+ sym->is_global ? 2 : 3, 0);
+ }
+}
+
+static long coff_segbase (long segment) {
+ return segment;
+}
+
+static void coff_std_filename (char *inname, char *outname, efunc error) {
+ strcpy(coff_infile, inname);
+ standard_extension (inname, outname, ".o", error);
+}
+
+static void coff_win32_filename (char *inname, char *outname, efunc error) {
+ strcpy(coff_infile, inname);
+ standard_extension (inname, outname, ".obj", error);
+}
+
+#endif /* defined(OF_COFF) || defined(OF_WIN32) */
+
+#ifdef OF_COFF
+
+struct ofmt of_coff = {
+ "COFF (i386) object files (e.g. DJGPP for DOS)",
+ "coff",
+ coff_std_init,
+ coff_out,
+ coff_deflabel,
+ coff_section_names,
+ coff_segbase,
+ coff_directives,
+ coff_std_filename,
+ coff_cleanup
+};
+
+#endif
+
+#ifdef OF_WIN32
+
+struct ofmt of_win32 = {
+ "Microsoft Win32 (i386) object files",
+ "win32",
+ coff_win32_init,
+ coff_out,
+ coff_deflabel,
+ coff_section_names,
+ coff_segbase,
+ coff_directives,
+ coff_win32_filename,
+ coff_cleanup
+};
+
+#endif
diff --git a/outdbg.c b/outdbg.c
new file mode 100644
index 0000000..a55d3db
--- /dev/null
+++ b/outdbg.c
@@ -0,0 +1,138 @@
+/* outdbg.c output routines for the Netwide Assembler to produce
+ * a debugging trace
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "outform.h"
+
+#ifdef OF_DBG
+
+FILE *dbgf;
+efunc dbgef;
+
+int segcode,segdata,segbss;
+
+static void dbg_init(FILE *fp, efunc errfunc, ldfunc ldef)
+{
+ dbgf = fp;
+ dbgef = errfunc;
+ (void) ldef;
+ segcode = seg_alloc();
+ segdata = seg_alloc();
+ segbss = seg_alloc();
+ fprintf(fp,"NASM Output format debug dump - code=%d,data=%d,bss=%d\n",
+ segcode,segdata,segbss);
+}
+
+static void dbg_cleanup(void)
+{
+ fclose(dbgf);
+}
+
+static long dbg_section_names (char *name, int pass, int *bits)
+{
+ /*
+ * We must have an initial default: let's make it 16.
+ */
+ if (!name)
+ *bits = 16;
+
+ if (!name)
+ return 0;
+
+ if (!strcmp(name, ".text"))
+ return segcode;
+ else if (!strcmp(name, ".data"))
+ return segdata;
+ else if (!strcmp(name, ".bss"))
+ return segbss;
+ else
+ return NO_SEG;
+}
+
+static void dbg_deflabel (char *name, long segment, long offset,
+ int is_global) {
+ fprintf(dbgf,"deflabel %s := %08lx:%08lx %s (%d)\n",name,segment,offset,
+ is_global ? "global" : "local", is_global);
+}
+
+static void dbg_out (long segto, void *data, unsigned long type,
+ long segment, long wrt) {
+ long realbytes = type & OUT_SIZMASK;
+ long ldata;
+ int id;
+
+ type &= OUT_TYPMASK;
+
+ fprintf(dbgf,"out to %lx, len = %ld: ",segto,realbytes);
+
+ switch(type) {
+ case OUT_RESERVE:
+ fprintf(dbgf,"reserved.\n"); break;
+ case OUT_RAWDATA:
+ fprintf(dbgf,"raw data = ");
+ while (realbytes--) {
+ id = *(unsigned char *)data;
+ data = (char *)data + 1;
+ fprintf(dbgf,"%02x ",id);
+ }
+ fprintf(dbgf,"\n"); break;
+ case OUT_ADDRESS:
+ ldata = 0; /* placate gcc */
+ if (realbytes == 1)
+ ldata = *((char *)data);
+ else if (realbytes == 2)
+ ldata = *((short *)data);
+ else if (realbytes == 4)
+ ldata = *((long *)data);
+ fprintf(dbgf,"addr %08lx (seg %08lx, wrt %08lx)\n",ldata,
+ segment,wrt);break;
+ case OUT_REL2ADR:
+ fprintf(dbgf,"rel2adr %04x (seg %08lx)\n",(int)*(short *)data,segment);
+ break;
+ case OUT_REL4ADR:
+ fprintf(dbgf,"rel4adr %08lx (seg %08lx)\n",*(long *)data,segment);
+ break;
+ default:
+ fprintf(dbgf,"unknown\n");
+ break;
+ }
+}
+
+static long dbg_segbase(long segment) {
+ return segment;
+}
+
+static int dbg_directive (char *directive, char *value, int pass) {
+ return 0;
+}
+
+static void dbg_filename (char *inname, char *outname, efunc error) {
+ standard_extension (inname, outname, ".dbg", error);
+}
+
+struct ofmt of_dbg = {
+ "Trace of all info passed to output stage",
+ "dbg",
+ dbg_init,
+ dbg_out,
+ dbg_deflabel,
+ dbg_section_names,
+ dbg_segbase,
+ dbg_directive,
+ dbg_filename,
+ dbg_cleanup
+};
+
+#endif /* OF_DBG */
diff --git a/outelf.c b/outelf.c
new file mode 100644
index 0000000..b84bae3
--- /dev/null
+++ b/outelf.c
@@ -0,0 +1,620 @@
+/* outelf.c output routines for the Netwide Assembler to produce
+ * ELF32 (i386 of course) object file format
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "outform.h"
+
+#ifdef OF_ELF
+
+struct Reloc {
+ struct Reloc *next;
+ long address; /* relative to _start_ of section */
+ long symbol; /* ELF symbol info thingy */
+ int relative; /* TRUE or FALSE */
+};
+
+struct Symbol {
+ long strpos; /* string table position of name */
+ long section; /* section ID of the symbol */
+ int type; /* TRUE or FALSE */
+ long value; /* address, or COMMON variable size */
+};
+
+struct Section {
+ struct SAA *data;
+ unsigned long len, size, nrelocs;
+ long index;
+ struct Reloc *head, **tail;
+};
+
+static struct Section stext, sdata;
+static unsigned long bsslen;
+static long bssindex;
+
+static struct SAA *syms;
+static unsigned long nlocals, nglobs;
+
+static struct RAA *bsym;
+
+static struct SAA *strs;
+static unsigned long strslen;
+
+static FILE *elffp;
+static efunc error;
+
+static char elf_module[FILENAME_MAX];
+
+#define SHN_ABS 0xFFF1
+#define SHN_COMMON 0xFFF2
+#define SHN_UNDEF 0
+
+#define SYM_SECTION 0x04
+#define SYM_GLOBAL 0x10
+
+#define GLOBAL_TEMP_BASE 6 /* bigger than any constant sym id */
+
+#define SEG_ALIGN 16 /* alignment of sections in file */
+#define SEG_ALIGN_1 (SEG_ALIGN-1)
+
+static const char align_str[SEG_ALIGN] = ""; /* ANSI will pad this with 0s */
+
+#define ELF_MAX_SECTIONS 16 /* really 10, but let's play safe */
+static struct ELF_SECTDATA {
+ void *data;
+ long len;
+ int is_saa;
+} elf_sects[ELF_MAX_SECTIONS];
+static int elf_nsect;
+static long elf_foffs;
+
+static void elf_write(void);
+static void elf_sect_write(struct Section *, unsigned char *, unsigned long);
+static void elf_section_header (int, int, int, void *, int, long,
+ int, int, int, int);
+static void elf_write_sections (void);
+static struct SAA *elf_build_symtab (long *, long *);
+static struct SAA *elf_build_reltab (long *, struct Reloc *);
+
+static void elf_init(FILE *fp, efunc errfunc, ldfunc ldef) {
+ elffp = fp;
+ error = errfunc;
+ (void) ldef; /* placate optimisers */
+ stext.data = saa_init(1L); stext.head = NULL; stext.tail = &stext.head;
+ sdata.data = saa_init(1L); sdata.head = NULL; sdata.tail = &sdata.head;
+ stext.len = stext.size = sdata.len = sdata.size = bsslen = 0;
+ stext.nrelocs = sdata.nrelocs = 0;
+ stext.index = seg_alloc();
+ sdata.index = seg_alloc();
+ bssindex = seg_alloc();
+ syms = saa_init((long)sizeof(struct Symbol));
+ nlocals = nglobs = 0;
+ bsym = raa_init();
+
+ strs = saa_init(1L);
+ saa_wbytes (strs, "\0", 1L);
+ saa_wbytes (strs, elf_module, (long)(strlen(elf_module)+1));
+ strslen = 2+strlen(elf_module);
+}
+
+static void elf_cleanup(void) {
+ struct Reloc *r;
+
+ elf_write();
+ fclose (elffp);
+ saa_free (stext.data);
+ while (stext.head) {
+ r = stext.head;
+ stext.head = stext.head->next;
+ nasm_free (r);
+ }
+ saa_free (sdata.data);
+ while (sdata.head) {
+ r = sdata.head;
+ sdata.head = sdata.head->next;
+ nasm_free (r);
+ }
+ saa_free (syms);
+ raa_free (bsym);
+ saa_free (strs);
+}
+
+static long elf_section_names (char *name, int pass, int *bits) {
+ /*
+ * Default is 32 bits.
+ */
+ if (!name)
+ *bits = 32;
+
+ if (!name)
+ return stext.index;
+
+ if (!strcmp(name, ".text"))
+ return stext.index;
+ else if (!strcmp(name, ".data"))
+ return sdata.index;
+ else if (!strcmp(name, ".bss"))
+ return bssindex;
+ else
+ return NO_SEG;
+}
+
+static void elf_deflabel (char *name, long segment, long offset,
+ int is_global) {
+ int pos = strslen;
+ struct Symbol *sym;
+
+ if (name[0] == '.' && name[1] == '.') {
+ return;
+ }
+
+ saa_wbytes (strs, name, (long)(1+strlen(name)));
+ strslen += 1+strlen(name);
+
+ sym = saa_wstruct (syms);
+
+ sym->strpos = pos;
+ sym->type = is_global ? SYM_GLOBAL : 0;
+ if (segment == NO_SEG)
+ sym->section = SHN_ABS;
+ else if (segment == stext.index)
+ sym->section = 1;
+ else if (segment == sdata.index)
+ sym->section = 2;
+ else if (segment == bssindex)
+ sym->section = 3;
+ else
+ sym->section = SHN_UNDEF;
+
+ if (is_global == 2) {
+ sym->value = offset;
+ sym->section = SHN_COMMON;
+ } else
+ sym->value = (sym->section == SHN_UNDEF ? 0 : offset);
+
+ if (sym->type == SYM_GLOBAL) {
+ if (sym->section == SHN_UNDEF || sym->section == SHN_COMMON)
+ bsym = raa_write (bsym, segment, nglobs);
+ nglobs++;
+ } else
+ nlocals++;
+}
+
+static void elf_add_reloc (struct Section *sect, long segment,
+ int relative) {
+ struct Reloc *r;
+
+ r = *sect->tail = nasm_malloc(sizeof(struct Reloc));
+ sect->tail = &r->next;
+ r->next = NULL;
+
+ r->address = sect->len;
+ r->symbol = (segment == NO_SEG ? 5 :
+ segment == stext.index ? 2 :
+ segment == sdata.index ? 3 :
+ segment == bssindex ? 4 :
+ GLOBAL_TEMP_BASE + raa_read(bsym, segment));
+ r->relative = relative;
+
+ sect->nrelocs++;
+}
+
+static void elf_out (long segto, void *data, unsigned long type,
+ long segment, long wrt) {
+ struct Section *s;
+ long realbytes = type & OUT_SIZMASK;
+ unsigned char mydata[4], *p;
+
+ if (wrt != NO_SEG) {
+ wrt = NO_SEG; /* continue to do _something_ */
+ error (ERR_NONFATAL, "WRT not supported by ELF output format");
+ }
+
+ type &= OUT_TYPMASK;
+
+ /*
+ * handle absolute-assembly (structure definitions)
+ */
+ if (segto == NO_SEG) {
+ if (type != OUT_RESERVE)
+ error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]"
+ " space");
+ return;
+ }
+
+ if (segto == stext.index)
+ s = &stext;
+ else if (segto == sdata.index)
+ s = &sdata;
+ else if (segto == bssindex)
+ s = NULL;
+ else {
+ error(ERR_WARNING, "attempt to assemble code in"
+ " segment %d: defaulting to `.text'", segto);
+ s = &stext;
+ }
+
+ if (!s && type != OUT_RESERVE) {
+ error(ERR_WARNING, "attempt to initialise memory in the"
+ " BSS section: ignored");
+ if (type == OUT_REL2ADR)
+ realbytes = 2;
+ else if (type == OUT_REL4ADR)
+ realbytes = 4;
+ bsslen += realbytes;
+ return;
+ }
+
+ if (type == OUT_RESERVE) {
+ if (s) {
+ error(ERR_WARNING, "uninitialised space declared in"
+ " %s section: zeroing",
+ (segto == stext.index ? "code" : "data"));
+ elf_sect_write (s, NULL, realbytes);
+ } else
+ bsslen += realbytes;
+ } else if (type == OUT_RAWDATA) {
+ if (segment != NO_SEG)
+ error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
+ elf_sect_write (s, data, realbytes);
+ } else if (type == OUT_ADDRESS) {
+ if (wrt != NO_SEG)
+ error(ERR_NONFATAL, "ELF format does not support WRT types");
+ if (segment != NO_SEG) {
+ if (segment % 2) {
+ error(ERR_NONFATAL, "ELF format does not support"
+ " segment base references");
+ } else
+ elf_add_reloc (s, segment, FALSE);
+ }
+ p = mydata;
+ if (realbytes == 2 && segment != NO_SEG)
+ error (ERR_NONFATAL, "ELF format does not support 16-bit"
+ " relocations");
+ WRITELONG (p, *(long *)data);
+ elf_sect_write (s, mydata, realbytes);
+ } else if (type == OUT_REL2ADR) {
+ error (ERR_NONFATAL, "ELF format does not support 16-bit"
+ " relocations");
+ } else if (type == OUT_REL4ADR) {
+ if (segment == segto)
+ error(ERR_PANIC, "intra-segment OUT_REL4ADR");
+ if (segment != NO_SEG && segment % 2) {
+ error(ERR_NONFATAL, "ELF format does not support"
+ " segment base references");
+ } else
+ elf_add_reloc (s, segment, TRUE);
+ p = mydata;
+ WRITELONG (p, *(long*)data - realbytes);
+ elf_sect_write (s, mydata, 4L);
+ }
+}
+
+static void elf_write(void) {
+ int nsections, align;
+ char shstrtab[80], *p;
+ int shstrtablen, commlen;
+ char comment[64];
+
+ struct SAA *symtab, *reltext, *reldata;
+ long symtablen, symtablocal, reltextlen, reldatalen;
+
+ /*
+ * Work out how many sections we will have.
+ *
+ * Fixed sections are:
+ * SHN_UNDEF .text .data .bss .comment .shstrtab .symtab .strtab
+ *
+ * Optional sections are:
+ * .rel.text .rel.data
+ *
+ * (.rel.bss makes very little sense;-)
+ */
+ nsections = 8;
+ *shstrtab = '\0';
+ shstrtablen = 1;
+ shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".text");
+ shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".data");
+ shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".bss");
+ shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".comment");
+ shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".shstrtab");
+ shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".symtab");
+ shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".strtab");
+ if (stext.head) {
+ nsections++;
+ shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".rel.text");
+ }
+ if (sdata.head) {
+ nsections++;
+ shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".rel.data");
+ }
+
+ /*
+ * Do the comment.
+ */
+ *comment = '\0';
+ commlen = 2+sprintf(comment+1, "The Netwide Assembler %s", NASM_VER);
+
+ /*
+ * Output the ELF header.
+ */
+ fwrite ("\177ELF\1\1\1\0\0\0\0\0\0\0\0\0", 16, 1, elffp);
+ fwriteshort (1, elffp); /* ET_REL relocatable file */
+ fwriteshort (3, elffp); /* EM_386 processor ID */
+ fwritelong (1L, elffp); /* EV_CURRENT file format version */
+ fwritelong (0L, elffp); /* no entry point */
+ fwritelong (0L, elffp); /* no program header table */
+ fwritelong (0x40L, elffp); /* section headers straight after
+ * ELF header plus alignment */
+ fwritelong (0L, elffp); /* 386 defines no special flags */
+ fwriteshort (0x34, elffp); /* size of ELF header */
+ fwriteshort (0, elffp); /* no program header table, again */
+ fwriteshort (0, elffp); /* still no program header table */
+ fwriteshort (0x28, elffp); /* size of section header */
+ fwriteshort (nsections, elffp); /* number of sections */
+ fwriteshort (5, elffp); /* string table section index for
+ * section header table */
+ fwritelong (0L, elffp); /* align to 0x40 bytes */
+ fwritelong (0L, elffp);
+ fwritelong (0L, elffp);
+
+ /*
+ * Build the symbol table and relocation tables.
+ */
+ symtab = elf_build_symtab (&symtablen, &symtablocal);
+ reltext = elf_build_reltab (&reltextlen, stext.head);
+ reldata = elf_build_reltab (&reldatalen, sdata.head);
+
+ /*
+ * Now output the section header table.
+ */
+
+ elf_foffs = 0x40 + 0x28 * nsections;
+ align = ((elf_foffs+SEG_ALIGN_1) & ~SEG_ALIGN_1) - elf_foffs;
+ elf_foffs += align;
+ elf_nsect = 0;
+
+ elf_section_header (0, 0, 0, NULL, FALSE, 0L, 0, 0, 0, 0); /* SHN_UNDEF */
+ p = shstrtab+1;
+ elf_section_header (p - shstrtab, 1, 6, stext.data, TRUE,
+ stext.len, 0, 0, 16, 0); /* .text */
+ p += strlen(p)+1;
+ elf_section_header (p - shstrtab, 1, 3, sdata.data, TRUE,
+ sdata.len, 0, 0, 4, 0); /* .data */
+ p += strlen(p)+1;
+ elf_section_header (p - shstrtab, 8, 3, NULL, TRUE,
+ bsslen, 0, 0, 4, 0); /* .bss */
+ p += strlen(p)+1;
+ elf_section_header (p - shstrtab, 1, 0, comment, FALSE,
+ (long)commlen, 0, 0, 1, 0);/* .comment */
+ p += strlen(p)+1;
+ elf_section_header (p - shstrtab, 3, 0, shstrtab, FALSE,
+ (long)shstrtablen, 0, 0, 1, 0);/* .shstrtab */
+ p += strlen(p)+1;
+ elf_section_header (p - shstrtab, 2, 0, symtab, TRUE,
+ symtablen, 7, symtablocal, 4, 16);/* .symtab */
+ p += strlen(p)+1;
+ elf_section_header (p - shstrtab, 3, 0, strs, TRUE,
+ strslen, 0, 0, 1, 0); /* .strtab */
+ if (reltext) {
+ p += strlen(p)+1;
+ elf_section_header (p - shstrtab, 9, 0, reltext, TRUE,
+ reltextlen, 6, 1, 4, 8); /* .rel.text */
+ }
+ if (reldata) {
+ p += strlen(p)+1;
+ elf_section_header (p - shstrtab, 9, 0, reldata, TRUE,
+ reldatalen, 6, 2, 4, 8); /* .rel.data */
+ }
+
+ fwrite (align_str, align, 1, elffp);
+
+ /*
+ * Now output the sections.
+ */
+ elf_write_sections();
+
+ saa_free (symtab);
+ if (reltext)
+ saa_free (reltext);
+ if (reldata)
+ saa_free (reldata);
+}
+
+static struct SAA *elf_build_symtab (long *len, long *local) {
+ struct SAA *s = saa_init(1L);
+ struct Symbol *sym;
+ unsigned char entry[16], *p;
+ int i;
+
+ *len = *local = 0;
+
+ /*
+ * First, an all-zeros entry, required by the ELF spec.
+ */
+ saa_wbytes (s, NULL, 16L); /* null symbol table entry */
+ *len += 16;
+ (*local)++;
+
+ /*
+ * Next, an entry for the file name.
+ */
+ p = entry;
+ WRITELONG (p, 1); /* we know it's 1st thing in strtab */
+ WRITELONG (p, 0); /* no value */
+ WRITELONG (p, 0); /* no size either */
+ WRITESHORT (p, 4); /* type FILE */
+ WRITESHORT (p, SHN_ABS);
+ saa_wbytes (s, entry, 16L);
+ *len += 16;
+ (*local)++;
+
+ /*
+ * Now four standard symbols defining segments, for relocation
+ * purposes.
+ */
+ for (i = 1; i <= 4; i++) {
+ p = entry;
+ WRITELONG (p, 0); /* no symbol name */
+ WRITELONG (p, 0); /* offset zero */
+ WRITELONG (p, 0); /* size zero */
+ WRITESHORT (p, 3); /* local section-type thing */
+ WRITESHORT (p, (i==4 ? SHN_ABS : i)); /* the section id */
+ saa_wbytes (s, entry, 16L);
+ *len += 16;
+ (*local)++;
+ }
+
+ /*
+ * Now the other local symbols.
+ */
+ saa_rewind (syms);
+ while ( (sym = saa_rstruct (syms)) ) {
+ if (sym->type == SYM_GLOBAL)
+ continue;
+ p = entry;
+ WRITELONG (p, sym->strpos);
+ WRITELONG (p, sym->value);
+ if (sym->section == SHN_COMMON)
+ WRITELONG (p, sym->value);
+ else
+ WRITELONG (p, 0);
+ WRITESHORT (p, 0); /* local non-typed thing */
+ WRITESHORT (p, sym->section);
+ saa_wbytes (s, entry, 16L);
+ *len += 16;
+ (*local)++;
+ }
+
+ /*
+ * Now the global symbols.
+ */
+ saa_rewind (syms);
+ while ( (sym = saa_rstruct (syms)) ) {
+ if (sym->type != SYM_GLOBAL)
+ continue;
+ p = entry;
+ WRITELONG (p, sym->strpos);
+ WRITELONG (p, sym->value);
+ if (sym->section == SHN_COMMON)
+ WRITELONG (p, sym->value);
+ else
+ WRITELONG (p, 0);
+ WRITESHORT (p, SYM_GLOBAL); /* global non-typed thing */
+ WRITESHORT (p, sym->section);
+ saa_wbytes (s, entry, 16L);
+ *len += 16;
+ }
+
+ return s;
+}
+
+static struct SAA *elf_build_reltab (long *len, struct Reloc *r) {
+ struct SAA *s;
+ unsigned char *p, entry[8];
+
+ if (!r)
+ return NULL;
+
+ s = saa_init(1L);
+ *len = 0;
+
+ while (r) {
+ long sym = r->symbol;
+
+ if (sym >= GLOBAL_TEMP_BASE)
+ sym += -GLOBAL_TEMP_BASE + 6 + nlocals;
+
+ p = entry;
+ WRITELONG (p, r->address);
+ WRITELONG (p, (sym << 8) + (r->relative ? 2 : 1));
+ saa_wbytes (s, entry, 8L);
+ *len += 8;
+
+ r = r->next;
+ }
+
+ return s;
+}
+
+static void elf_section_header (int name, int type, int flags,
+ void *data, int is_saa, long datalen,
+ int link, int info, int align, int eltsize) {
+ elf_sects[elf_nsect].data = data;
+ elf_sects[elf_nsect].len = datalen;
+ elf_sects[elf_nsect].is_saa = is_saa;
+ elf_nsect++;
+
+ fwritelong ((long)name, elffp);
+ fwritelong ((long)type, elffp);
+ fwritelong ((long)flags, elffp);
+ fwritelong (0L, elffp); /* no address, ever, in object files */
+ fwritelong (type == 0 ? 0L : elf_foffs, elffp);
+ fwritelong (datalen, elffp);
+ if (data)
+ elf_foffs += (datalen+SEG_ALIGN_1) & ~SEG_ALIGN_1;
+ fwritelong ((long)link, elffp);
+ fwritelong ((long)info, elffp);
+ fwritelong ((long)align, elffp);
+ fwritelong ((long)eltsize, elffp);
+}
+
+static void elf_write_sections (void) {
+ int i;
+ for (i = 0; i < elf_nsect; i++)
+ if (elf_sects[i].data) {
+ long len = elf_sects[i].len;
+ long reallen = (len+SEG_ALIGN_1) & ~SEG_ALIGN_1;
+ long align = reallen - len;
+ if (elf_sects[i].is_saa)
+ saa_fpwrite (elf_sects[i].data, elffp);
+ else
+ fwrite (elf_sects[i].data, len, 1, elffp);
+ fwrite (align_str, align, 1, elffp);
+ }
+}
+
+static void elf_sect_write (struct Section *sect,
+ unsigned char *data, unsigned long len) {
+ saa_wbytes (sect->data, data, len);
+ sect->len += len;
+}
+
+static long elf_segbase (long segment) {
+ return segment;
+}
+
+static int elf_directive (char *directive, char *value, int pass) {
+ return 0;
+}
+
+static void elf_filename (char *inname, char *outname, efunc error) {
+ strcpy(elf_module, inname);
+ standard_extension (inname, outname, ".o", error);
+}
+
+struct ofmt of_elf = {
+ "ELF32 (i386) object files (e.g. Linux)",
+ "elf",
+ elf_init,
+ elf_out,
+ elf_deflabel,
+ elf_section_names,
+ elf_segbase,
+ elf_directive,
+ elf_filename,
+ elf_cleanup
+};
+
+#endif /* OF_ELF */
diff --git a/outform.c b/outform.c
new file mode 100644
index 0000000..154c63f
--- /dev/null
+++ b/outform.c
@@ -0,0 +1,42 @@
+/* outform.c manages a list of output formats, and associates
+ * them with their relevant drivers. Also has a
+ * routine to find the correct driver given a name
+ * for it
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "outform.h"
+
+static struct ofmt *drivers[MAX_OUTPUT_FORMATS];
+static int ndrivers = 0;
+
+struct ofmt *ofmt_find(char *name) /* find driver */
+{
+ int i;
+
+ for (i=0; i<ndrivers; i++)
+ if (!strcmp(name,drivers[i]->shortname))
+ return drivers[i];
+
+ return NULL;
+}
+
+void ofmt_list(struct ofmt *deffmt)
+{
+ int i;
+ for (i=0; i<ndrivers; i++)
+ fprintf(stderr," %c %-7s%s\n",
+ drivers[i] == deffmt ? '*' : ' ',
+ drivers[i]->shortname,
+ drivers[i]->fullname);
+}
+
+void ofmt_register (struct ofmt *info) {
+ drivers[ndrivers++] = info;
+}
diff --git a/outform.h b/outform.h
new file mode 100644
index 0000000..48b8276
--- /dev/null
+++ b/outform.h
@@ -0,0 +1,167 @@
+/* outform.h header file for binding output format drivers to the
+ * remainder of the code in the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+/*
+ * This header file allows configuration of which output formats
+ * get compiled into the NASM binary. You can configure by defining
+ * various preprocessor symbols beginning with "OF_", either on the
+ * compiler command line or at the top of this file.
+ *
+ * OF_ONLY -- only include specified object formats
+ * OF_name -- ensure that output format 'name' is included
+ * OF_NO_name -- remove output format 'name'
+ * OF_DOS -- ensure that 'obj', 'bin' & 'win32' are included.
+ * OF_UNIX -- ensure that 'aout', 'coff' and 'elf' are in.
+ * OF_OTHERS -- ensure that 'bin', 'as86' & 'rdf' are in.
+ * OF_ALL -- ensure that all formats are included.
+ *
+ * OF_DEFAULT=of_name -- ensure that 'name' is the default format.
+ *
+ * eg: -DOF_UNIX -DOF_ELF -DOF_DEFAULT=of_elf would be a suitable config
+ * for an average linux system.
+ *
+ * Default config = -DOF_ALL -DOF_DEFAULT=of_bin
+ *
+ * You probably only want to set these options while compiling 'nasm.c'. */
+
+#ifndef NASM_OUTFORM_H
+#define NASM_OUTFORM_H
+
+#include "nasm.h"
+
+#define MAX_OUTPUT_FORMATS 16
+
+struct ofmt *ofmt_find(char *name);
+void ofmt_list(struct ofmt *deffmt);
+void ofmt_register (struct ofmt *);
+
+/* -------------- USER MODIFIABLE PART ---------------- */
+
+/*
+ * Insert #defines here in accordance with the configuration
+ * instructions above.
+ *
+ * E.g.
+ *
+ * #define OF_ONLY
+ * #define OF_OBJ
+ * #define OF_BIN
+ *
+ * for a 16-bit DOS assembler with no extraneous formats.
+ */
+
+/* ------------ END USER MODIFIABLE PART -------------- */
+
+/* ====configurable info begins here==== */
+/* formats configurable:
+ * bin,obj,elf,aout,coff,win32,as86,rdf */
+
+/* process options... */
+
+#ifndef OF_ONLY
+#ifndef OF_ALL
+#define OF_ALL /* default is to have all formats */
+#endif
+#endif
+
+#ifdef OF_ALL /* set all formats on... */
+#ifndef OF_BIN
+#define OF_BIN
+#endif
+#ifndef OF_OBJ
+#define OF_OBJ
+#endif
+#ifndef OF_ELF
+#define OF_ELF
+#endif
+#ifndef OF_COFF
+#define OF_COFF
+#endif
+#ifndef OF_AOUT
+#define OF_AOUT
+#endif
+#ifndef OF_WIN32
+#define OF_WIN32
+#endif
+#ifndef OF_AS86
+#define OF_AS86
+#endif
+#ifndef OF_RDF
+#define OF_RDF
+#endif
+#endif /* OF_ALL */
+
+/* turn on groups of formats specified.... */
+#ifdef OF_DOS
+#ifndef OF_OBJ
+#define OF_OBJ
+#endif
+#ifndef OF_BIN
+#define OF_BIN
+#endif
+#ifndef OF_WIN32
+#define OF_WIN32
+#endif
+#endif
+
+#ifdef OF_UNIX
+#ifndef OF_AOUT
+#define OF_AOUT
+#endif
+#ifndef OF_COFF
+#define OF_COFF
+#endif
+#ifndef OF_ELF
+#define OF_ELF
+#endif
+#endif
+
+#ifdef OF_OTHERS
+#ifndef OF_BIN
+#define OF_BIN
+#endif
+#ifndef OF_AS86
+#define OF_AS86
+#endif
+#ifndef OF_RDF
+#define OF_RDF
+#endif
+#endif
+
+/* finally... override any format specifically specifed to be off */
+#ifdef OF_NO_BIN
+#undef OF_BIN
+#endif
+#ifdef OF_NO_OBJ
+#undef OF_OBJ
+#endif
+#ifdef OF_NO_ELF
+#undef OF_ELF
+#endif
+#ifdef OF_NO_AOUT
+#undef OF_AOUT
+#endif
+#ifdef OF_NO_COFF
+#undef OF_COFF
+#endif
+#ifdef OF_NO_WIN32
+#undef OF_WIN32
+#endif
+#ifdef OF_NO_AS86
+#undef OF_AS86
+#endif
+#ifdef OF_NO_RDF
+#undef OF_RDF
+#endif
+
+#ifndef OF_DEFAULT
+#define OF_DEFAULT of_bin
+#endif
+
+#endif /* NASM_OUTFORM_H */
diff --git a/outobj.c b/outobj.c
new file mode 100644
index 0000000..b33b72d
--- /dev/null
+++ b/outobj.c
@@ -0,0 +1,1229 @@
+/* outobj.c output routines for the Netwide Assembler to produce
+ * Microsoft 16-bit .OBJ object files
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "outform.h"
+
+#ifdef OF_OBJ
+
+static char obj_infile[FILENAME_MAX];
+static int obj_uppercase;
+
+static efunc error;
+static ldfunc deflabel;
+static FILE *ofp;
+static long first_seg;
+static int any_segs;
+
+#define LEDATA_MAX 1024 /* maximum size of LEDATA record */
+#define RECORD_MAX 1024 /* maximum size of _any_ record */
+#define GROUP_MAX 256 /* we won't _realistically_ have more
+ * than this many segs in a group */
+#define EXT_BLKSIZ 256 /* block size for externals list */
+
+static unsigned char record[RECORD_MAX], *recptr;
+
+static struct Public {
+ struct Public *next;
+ char *name;
+ long offset;
+ long segment; /* only if it's far-absolute */
+} *fpubhead, **fpubtail;
+
+static struct External {
+ struct External *next;
+ char *name;
+ long commonsize;
+} *exthead, **exttail;
+
+static int externals;
+
+static struct ExtBack {
+ struct ExtBack *next;
+ int index[EXT_BLKSIZ];
+} *ebhead, **ebtail;
+
+static struct Segment {
+ struct Segment *next;
+ long index; /* the NASM segment id */
+ long obj_index; /* the OBJ-file segment index */
+ struct Group *grp; /* the group it belongs to */
+ long currentpos;
+ long align; /* can be SEG_ABS + absolute addr */
+ enum {
+ CMB_PRIVATE = 0,
+ CMB_PUBLIC = 2,
+ CMB_STACK = 5,
+ CMB_COMMON = 6
+ } combine;
+ long use32; /* is this segment 32-bit? */
+ struct Public *pubhead, **pubtail;
+ char *name;
+ char *segclass, *overlay; /* `class' is a C++ keyword :-) */
+} *seghead, **segtail, *obj_seg_needs_update;
+
+static struct Group {
+ struct Group *next;
+ char *name;
+ long index; /* NASM segment id */
+ long obj_index; /* OBJ-file group index */
+ long nentries; /* number of elements... */
+ long nindices; /* ...and number of index elts... */
+ union {
+ long index;
+ char *name;
+ } segs[GROUP_MAX]; /* ...in this */
+} *grphead, **grptail, *obj_grp_needs_update;
+
+static struct ObjData {
+ struct ObjData *next;
+ int nonempty;
+ struct Segment *seg;
+ long startpos;
+ int letype, ftype;
+ unsigned char ledata[LEDATA_MAX], *lptr;
+ unsigned char fixupp[RECORD_MAX], *fptr;
+} *datahead, *datacurr, **datatail;
+
+static long obj_entry_seg, obj_entry_ofs;
+
+enum RecordID { /* record ID codes */
+
+ THEADR = 0x80, /* module header */
+ COMENT = 0x88, /* comment record */
+
+ LNAMES = 0x96, /* list of names */
+
+ SEGDEF = 0x98, /* segment definition */
+ GRPDEF = 0x9A, /* group definition */
+ EXTDEF = 0x8C, /* external definition */
+ PUBDEF = 0x90, /* public definition */
+ COMDEF = 0xB0, /* common definition */
+
+ LEDATA = 0xA0, /* logical enumerated data */
+ FIXUPP = 0x9C, /* fixups (relocations) */
+
+ MODEND = 0x8A /* module end */
+};
+
+extern struct ofmt of_obj;
+
+static long obj_ledata_space(struct Segment *);
+static int obj_fixup_free(struct Segment *);
+static void obj_ledata_new(struct Segment *);
+static void obj_ledata_commit(void);
+static void obj_write_fixup (struct ObjData *, int, int, long, long, long);
+static long obj_segment (char *, int, int *);
+static void obj_write_file(void);
+static unsigned char *obj_write_data(unsigned char *, unsigned char *, int);
+static unsigned char *obj_write_byte(unsigned char *, int);
+static unsigned char *obj_write_word(unsigned char *, int);
+static unsigned char *obj_write_dword(unsigned char *, long);
+static unsigned char *obj_write_rword(unsigned char *, int);
+static unsigned char *obj_write_name(unsigned char *, char *);
+static unsigned char *obj_write_index(unsigned char *, int);
+static unsigned char *obj_write_value(unsigned char *, unsigned long);
+static void obj_record(int, unsigned char *, unsigned char *);
+
+static void obj_init (FILE *fp, efunc errfunc, ldfunc ldef) {
+ ofp = fp;
+ error = errfunc;
+ deflabel = ldef;
+ first_seg = seg_alloc();
+ any_segs = FALSE;
+ fpubhead = NULL;
+ fpubtail = &fpubhead;
+ exthead = NULL;
+ exttail = &exthead;
+ externals = 0;
+ ebhead = NULL;
+ ebtail = &ebhead;
+ seghead = obj_seg_needs_update = NULL;
+ segtail = &seghead;
+ grphead = obj_grp_needs_update = NULL;
+ grptail = &grphead;
+ datahead = datacurr = NULL;
+ datatail = &datahead;
+ obj_entry_seg = NO_SEG;
+ obj_uppercase = FALSE;
+}
+
+static void obj_cleanup (void) {
+ obj_write_file();
+ fclose (ofp);
+ while (seghead) {
+ struct Segment *segtmp = seghead;
+ seghead = seghead->next;
+ while (segtmp->pubhead) {
+ struct Public *pubtmp = segtmp->pubhead;
+ segtmp->pubhead = pubtmp->next;
+ nasm_free (pubtmp);
+ }
+ nasm_free (segtmp);
+ }
+ while (fpubhead) {
+ struct Public *pubtmp = fpubhead;
+ fpubhead = fpubhead->next;
+ nasm_free (pubtmp);
+ }
+ while (exthead) {
+ struct External *exttmp = exthead;
+ exthead = exthead->next;
+ nasm_free (exttmp);
+ }
+ while (ebhead) {
+ struct ExtBack *ebtmp = ebhead;
+ ebhead = ebhead->next;
+ nasm_free (ebtmp);
+ }
+ while (grphead) {
+ struct Group *grptmp = grphead;
+ grphead = grphead->next;
+ nasm_free (grptmp);
+ }
+ while (datahead) {
+ struct ObjData *datatmp = datahead;
+ datahead = datahead->next;
+ nasm_free (datatmp);
+ }
+}
+
+static void obj_deflabel (char *name, long segment,
+ long offset, int is_global) {
+ /*
+ * We have three cases:
+ *
+ * (i) `segment' is a segment-base. If so, set the name field
+ * for the segment or group structure it refers to, and then
+ * return.
+ *
+ * (ii) `segment' is one of our segments, or a SEG_ABS segment.
+ * Save the label position for later output of a PUBDEF record.
+ * (Or a MODPUB, if we work out how.)
+ *
+ * (iii) `segment' is not one of our segments. Save the label
+ * position for later output of an EXTDEF, and also store a
+ * back-reference so that we can map later references to this
+ * segment number to the external index.
+ */
+ struct External *ext;
+ struct ExtBack *eb;
+ struct Segment *seg;
+ int i;
+
+ /*
+ * First check for the double-period, signifying something
+ * unusual.
+ */
+ if (name[0] == '.' && name[1] == '.') {
+ if (!strcmp(name, "..start")) {
+ obj_entry_seg = segment;
+ obj_entry_ofs = offset;
+ }
+ return;
+ }
+
+ /*
+ * Case (i):
+ */
+ if (obj_seg_needs_update) {
+ obj_seg_needs_update->name = name;
+ return;
+ } else if (obj_grp_needs_update) {
+ obj_grp_needs_update->name = name;
+ return;
+ }
+ if (segment < SEG_ABS && segment != NO_SEG && segment % 2)
+ return;
+
+ if (segment >= SEG_ABS) {
+ /*
+ * SEG_ABS subcase of (ii).
+ */
+ if (is_global) {
+ struct Public *pub;
+
+ pub = *fpubtail = nasm_malloc(sizeof(*pub));
+ fpubtail = &pub->next;
+ pub->next = NULL;
+ pub->name = name;
+ pub->offset = offset;
+ pub->segment = segment & ~SEG_ABS;
+ }
+ return;
+ }
+
+ for (seg = seghead; seg; seg = seg->next)
+ if (seg->index == segment) {
+ /*
+ * Case (ii). Maybe MODPUB someday?
+ */
+ if (is_global) {
+ struct Public *pub;
+
+ pub = *seg->pubtail = nasm_malloc(sizeof(*pub));
+ seg->pubtail = &pub->next;
+ pub->next = NULL;
+ pub->name = name;
+ pub->offset = offset;
+ }
+ return;
+ }
+
+ /*
+ * Case (iii).
+ */
+ ext = *exttail = nasm_malloc(sizeof(*ext));
+ ext->next = NULL;
+ exttail = &ext->next;
+ ext->name = name;
+ if (is_global == 2)
+ ext->commonsize = offset;
+ else
+ ext->commonsize = 0;
+
+ i = segment/2;
+ eb = ebhead;
+ if (!eb) {
+ eb = *ebtail = nasm_malloc(sizeof(*eb));
+ eb->next = NULL;
+ ebtail = &eb->next;
+ }
+ while (i > EXT_BLKSIZ) {
+ if (eb && eb->next)
+ eb = eb->next;
+ else {
+ eb = *ebtail = nasm_malloc(sizeof(*eb));
+ eb->next = NULL;
+ ebtail = &eb->next;
+ }
+ i -= EXT_BLKSIZ;
+ }
+ eb->index[i] = ++externals;
+}
+
+static void obj_out (long segto, void *data, unsigned long type,
+ long segment, long wrt) {
+ long size, realtype;
+ unsigned char *ucdata;
+ long ldata;
+ struct Segment *seg;
+
+ /*
+ * handle absolute-assembly (structure definitions)
+ */
+ if (segto == NO_SEG) {
+ if ((type & OUT_TYPMASK) != OUT_RESERVE)
+ error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]"
+ " space");
+ return;
+ }
+
+ /*
+ * If `any_segs' is still FALSE, we must define a default
+ * segment.
+ */
+ if (!any_segs) {
+ int tempint; /* ignored */
+ if (segto != obj_segment("__NASMDEFSEG", 2, &tempint))
+ error (ERR_PANIC, "strange segment conditions in OBJ driver");
+ }
+
+ /*
+ * Find the segment we are targetting.
+ */
+ for (seg = seghead; seg; seg = seg->next)
+ if (seg->index == segto)
+ break;
+ if (!seg)
+ error (ERR_PANIC, "code directed to nonexistent segment?");
+
+ size = type & OUT_SIZMASK;
+ realtype = type & OUT_TYPMASK;
+ if (realtype == OUT_RAWDATA) {
+ ucdata = data;
+ while (size > 0) {
+ long len = obj_ledata_space(seg);
+ if (len == 0) {
+ obj_ledata_new(seg);
+ len = obj_ledata_space(seg);
+ }
+ if (len > size)
+ len = size;
+ datacurr->lptr = obj_write_data (datacurr->lptr, ucdata, len);
+ datacurr->nonempty = TRUE;
+ ucdata += len;
+ size -= len;
+ seg->currentpos += len;
+ }
+ } else if (realtype == OUT_ADDRESS || realtype == OUT_REL2ADR ||
+ realtype == OUT_REL4ADR) {
+ if (segment == NO_SEG && realtype != OUT_ADDRESS)
+ error(ERR_NONFATAL, "relative call to absolute address not"
+ " supported by OBJ format");
+ if (segment >= SEG_ABS)
+ error(ERR_NONFATAL, "far-absolute relocations not supported"
+ " by OBJ format");
+ ldata = *(long *)data;
+ if (realtype == OUT_REL2ADR)
+ ldata += (size-2);
+ if (realtype == OUT_REL4ADR)
+ ldata += (size-4);
+ if (obj_ledata_space(seg) < 4 || !obj_fixup_free(seg))
+ obj_ledata_new(seg);
+ if (size == 2)
+ datacurr->lptr = obj_write_word (datacurr->lptr, ldata);
+ else
+ datacurr->lptr = obj_write_dword (datacurr->lptr, ldata);
+ datacurr->nonempty = TRUE;
+ if (segment != NO_SEG)
+ obj_write_fixup (datacurr, size,
+ (realtype == OUT_REL2ADR ? 0 : 0x4000),
+ segment, wrt,
+ (seg->currentpos - datacurr->startpos));
+ seg->currentpos += size;
+ } else if (realtype == OUT_RESERVE) {
+ obj_ledata_commit();
+ seg->currentpos += size;
+ }
+}
+
+static long obj_ledata_space(struct Segment *segto) {
+ if (datacurr && datacurr->seg == segto)
+ return datacurr->ledata + LEDATA_MAX - datacurr->lptr;
+ else
+ return 0;
+}
+
+static int obj_fixup_free(struct Segment *segto) {
+ if (datacurr && datacurr->seg == segto)
+ return (datacurr->fixupp + RECORD_MAX - datacurr->fptr) > 8;
+ else
+ return 0;
+}
+
+static void obj_ledata_new(struct Segment *segto) {
+ datacurr = *datatail = nasm_malloc(sizeof(*datacurr));
+ datacurr->next = NULL;
+ datatail = &datacurr->next;
+ datacurr->nonempty = FALSE;
+ datacurr->lptr = datacurr->ledata;
+ datacurr->fptr = datacurr->fixupp;
+ datacurr->seg = segto;
+ if (segto->use32)
+ datacurr->letype = LEDATA+1;
+ else
+ datacurr->letype = LEDATA;
+ datacurr->startpos = segto->currentpos;
+ datacurr->ftype = FIXUPP;
+
+ datacurr->lptr = obj_write_index (datacurr->lptr, segto->obj_index);
+ if (datacurr->letype == LEDATA)
+ datacurr->lptr = obj_write_word (datacurr->lptr, segto->currentpos);
+ else
+ datacurr->lptr = obj_write_dword (datacurr->lptr, segto->currentpos);
+}
+
+static void obj_ledata_commit(void) {
+ datacurr = NULL;
+}
+
+static void obj_write_fixup (struct ObjData *data, int bytes,
+ int segrel, long seg, long wrt,
+ long offset) {
+ int locat, method;
+ int base;
+ long tidx, fidx;
+ struct Segment *s = NULL;
+ struct Group *g = NULL;
+
+ locat = 0x8000 | segrel | offset;
+ if (seg % 2) {
+ base = TRUE;
+ locat |= 0x800;
+ seg--;
+ if (bytes != 2)
+ error(ERR_NONFATAL, "OBJ format can only handle 2-byte"
+ " segment base references");
+ } else {
+ base = FALSE;
+ if (bytes == 2)
+ locat |= 0x400;
+ else {
+ locat |= 0x2400;
+ data->ftype = FIXUPP+1; /* need new-style FIXUPP record */
+ }
+ }
+ data->fptr = obj_write_rword (data->fptr, locat);
+
+ tidx = fidx = -1, method = 0; /* placate optimisers */
+
+ /*
+ * See if we can find the segment ID in our segment list. If
+ * so, we have a T4 (LSEG) target.
+ */
+ for (s = seghead; s; s = s->next)
+ if (s->index == seg)
+ break;
+ if (s)
+ method = 4, tidx = s->obj_index;
+ else {
+ for (g = grphead; g; g = g->next)
+ if (g->index == seg)
+ break;
+ if (g)
+ method = 5, tidx = g->obj_index;
+ else {
+ long i = seg/2;
+ struct ExtBack *eb = ebhead;
+ while (i > EXT_BLKSIZ) {
+ if (eb)
+ eb = eb->next;
+ else
+ break;
+ i -= EXT_BLKSIZ;
+ }
+ if (eb)
+ method = 6, tidx = eb->index[i];
+ else
+ error(ERR_PANIC,
+ "unrecognised segment value in obj_write_fixup");
+ }
+ }
+
+ /*
+ * If no WRT given, assume the natural default, which is method
+ * F5 unless we are doing an OFFSET fixup for a grouped
+ * segment, in which case we require F1 (group).
+ */
+ if (wrt == NO_SEG) {
+ if (!base && s && s->grp)
+ method |= 0x10, fidx = s->grp->obj_index;
+ else
+ method |= 0x50, fidx = -1;
+ } else {
+ /*
+ * See if we can find the WRT-segment ID in our segment
+ * list. If so, we have a F0 (LSEG) frame.
+ */
+ for (s = seghead; s; s = s->next)
+ if (s->index == wrt-1)
+ break;
+ if (s)
+ method |= 0x00, fidx = s->obj_index;
+ else {
+ for (g = grphead; g; g = g->next)
+ if (g->index == wrt-1)
+ break;
+ if (g)
+ method |= 0x10, fidx = g->obj_index;
+ else {
+ long i = wrt/2;
+ struct ExtBack *eb = ebhead;
+ while (i > EXT_BLKSIZ) {
+ if (eb)
+ eb = eb->next;
+ else
+ break;
+ i -= EXT_BLKSIZ;
+ }
+ if (eb)
+ method |= 0x20, fidx = eb->index[i];
+ else
+ error(ERR_PANIC,
+ "unrecognised WRT value in obj_write_fixup");
+ }
+ }
+ }
+
+ data->fptr = obj_write_byte (data->fptr, method);
+ if (fidx != -1)
+ data->fptr = obj_write_index (data->fptr, fidx);
+ data->fptr = obj_write_index (data->fptr, tidx);
+}
+
+static long obj_segment (char *name, int pass, int *bits) {
+ /*
+ * We call the label manager here to define a name for the new
+ * segment, and when our _own_ label-definition stub gets
+ * called in return, it should register the new segment name
+ * using the pointer it gets passed. That way we save memory,
+ * by sponging off the label manager.
+ */
+ if (!name) {
+ *bits = 16;
+ return first_seg;
+ } else {
+ struct Segment *seg;
+ struct Group *grp;
+ int obj_idx, i, attrs, rn_error;
+ char *p;
+
+ /*
+ * Look for segment attributes.
+ */
+ attrs = 0;
+ p = name;
+ while (*p && !isspace(*p))
+ p++;
+ if (*p) {
+ *p++ = '\0';
+ while (*p && isspace(*p))
+ *p++ = '\0';
+ }
+ while (*p) {
+ while (*p && !isspace(*p))
+ p++;
+ if (*p) {
+ *p++ = '\0';
+ while (*p && isspace(*p))
+ *p++ = '\0';
+ }
+
+ attrs++;
+ }
+
+ obj_idx = 1;
+ for (seg = seghead; seg; seg = seg->next) {
+ obj_idx++;
+ if (!strcmp(seg->name, name)) {
+ if (attrs > 0 && pass == 1)
+ error(ERR_WARNING, "segment attributes specified on"
+ " redeclaration of segment: ignoring");
+ if (seg->use32)
+ *bits = 32;
+ else
+ *bits = 16;
+ return seg->index;
+ }
+ }
+
+ *segtail = seg = nasm_malloc(sizeof(*seg));
+ seg->next = NULL;
+ segtail = &seg->next;
+ seg->index = (any_segs ? seg_alloc() : first_seg);
+ seg->obj_index = obj_idx;
+ seg->grp = NULL;
+ any_segs = TRUE;
+ seg->name = NULL;
+ seg->currentpos = 0;
+ seg->align = 1; /* default */
+ seg->use32 = FALSE; /* default */
+ seg->combine = CMB_PUBLIC; /* default */
+ seg->segclass = seg->overlay = NULL;
+ seg->pubhead = NULL;
+ seg->pubtail = &seg->pubhead;
+
+ /*
+ * Process the segment attributes.
+ */
+ p = name;
+ while (attrs--) {
+ p += strlen(p);
+ while (!*p) p++;
+
+ /*
+ * `p' contains a segment attribute.
+ */
+ if (!nasm_stricmp(p, "private"))
+ seg->combine = CMB_PRIVATE;
+ else if (!nasm_stricmp(p, "public"))
+ seg->combine = CMB_PUBLIC;
+ else if (!nasm_stricmp(p, "common"))
+ seg->combine = CMB_COMMON;
+ else if (!nasm_stricmp(p, "stack"))
+ seg->combine = CMB_STACK;
+ else if (!nasm_stricmp(p, "use16"))
+ seg->use32 = FALSE;
+ else if (!nasm_stricmp(p, "use32"))
+ seg->use32 = TRUE;
+ else if (!nasm_strnicmp(p, "class=", 6))
+ seg->segclass = nasm_strdup(p+6);
+ else if (!nasm_strnicmp(p, "overlay=", 8))
+ seg->overlay = nasm_strdup(p+8);
+ else if (!nasm_strnicmp(p, "align=", 6)) {
+ seg->align = readnum(p+6, &rn_error);
+ if (rn_error) {
+ seg->align = 1;
+ error (ERR_NONFATAL, "segment alignment should be"
+ " numeric");
+ }
+ switch ((int) seg->align) {
+ case 1: /* BYTE */
+ case 2: /* WORD */
+ case 4: /* DWORD */
+ case 16: /* PARA */
+ case 256: /* PAGE */
+ break;
+ case 8:
+ error(ERR_WARNING, "OBJ format does not support alignment"
+ " of 8: rounding up to 16");
+ seg->align = 16;
+ break;
+ case 32:
+ case 64:
+ case 128:
+ error(ERR_WARNING, "OBJ format does not support alignment"
+ " of %d: rounding up to 256", seg->align);
+ seg->align = 256;
+ break;
+ default:
+ error(ERR_NONFATAL, "invalid alignment value %d",
+ seg->align);
+ seg->align = 1;
+ break;
+ }
+ } else if (!nasm_strnicmp(p, "absolute=", 9)) {
+ seg->align = SEG_ABS + readnum(p+9, &rn_error);
+ if (rn_error)
+ error (ERR_NONFATAL, "argument to `absolute' segment"
+ " attribute should be numeric");
+ }
+ }
+
+ obj_seg_needs_update = seg;
+ if (seg->align >= SEG_ABS)
+ deflabel (name, NO_SEG, seg->align - SEG_ABS, &of_obj, error);
+ else
+ deflabel (name, seg->index+1, 0L, &of_obj, error);
+ obj_seg_needs_update = NULL;
+
+ /*
+ * See if this segment is defined in any groups.
+ */
+ for (grp = grphead; grp; grp = grp->next) {
+ for (i = grp->nindices; i < grp->nentries; i++) {
+ if (!strcmp(grp->segs[i].name, seg->name)) {
+ nasm_free (grp->segs[i].name);
+ grp->segs[i] = grp->segs[grp->nindices];
+ grp->segs[grp->nindices++].index = seg->obj_index;
+ if (seg->grp)
+ error(ERR_WARNING, "segment `%s' is already part of"
+ " a group: first one takes precedence",
+ seg->name);
+ else
+ seg->grp = grp;
+ }
+ }
+ }
+
+ if (seg->use32)
+ *bits = 32;
+ else
+ *bits = 16;
+ return seg->index;
+ }
+}
+
+static int obj_directive (char *directive, char *value, int pass) {
+ if (!strcmp(directive, "group")) {
+ char *p, *q;
+ if (pass == 1) {
+ struct Group *grp;
+ struct Segment *seg;
+ int obj_idx;
+
+ q = value;
+ while (*q && !isspace(*q))
+ q++;
+ if (isspace(*q)) {
+ *q++ = '\0';
+ while (*q && isspace(*q))
+ q++;
+ }
+ if (!*q) {
+ error(ERR_NONFATAL, "GROUP directive contains no segments");
+ return 1;
+ }
+
+ obj_idx = 1;
+ for (grp = grphead; grp; grp = grp->next) {
+ obj_idx++;
+ if (!strcmp(grp->name, value)) {
+ error(ERR_NONFATAL, "group `%s' defined twice", value);
+ return 1;
+ }
+ }
+
+ *grptail = grp = nasm_malloc(sizeof(*grp));
+ grp->next = NULL;
+ grptail = &grp->next;
+ grp->index = seg_alloc();
+ grp->obj_index = obj_idx;
+ grp->nindices = grp->nentries = 0;
+ grp->name = NULL;
+
+ obj_grp_needs_update = grp;
+ deflabel (value, grp->index+1, 0L, &of_obj, error);
+ obj_grp_needs_update = NULL;
+
+ while (*q) {
+ p = q;
+ while (*q && !isspace(*q))
+ q++;
+ if (isspace(*q)) {
+ *q++ = '\0';
+ while (*q && isspace(*q))
+ q++;
+ }
+ /*
+ * Now p contains a segment name. Find it.
+ */
+ for (seg = seghead; seg; seg = seg->next)
+ if (!strcmp(seg->name, p))
+ break;
+ if (seg) {
+ /*
+ * We have a segment index. Shift a name entry
+ * to the end of the array to make room.
+ */
+ grp->segs[grp->nentries++] = grp->segs[grp->nindices];
+ grp->segs[grp->nindices++].index = seg->obj_index;
+ if (seg->grp)
+ error(ERR_WARNING, "segment `%s' is already part of"
+ " a group: first one takes precedence",
+ seg->name);
+ else
+ seg->grp = grp;
+ } else {
+ /*
+ * We have an as-yet undefined segment.
+ * Remember its name, for later.
+ */
+ grp->segs[grp->nentries++].name = nasm_strdup(p);
+ }
+ }
+ }
+ return 1;
+ }
+ if (!strcmp(directive, "uppercase")) {
+ obj_uppercase = TRUE;
+ return 1;
+ }
+ return 0;
+}
+
+static long obj_segbase (long segment) {
+ struct Segment *seg;
+
+ /*
+ * Find the segment in our list.
+ */
+ for (seg = seghead; seg; seg = seg->next)
+ if (seg->index == segment-1)
+ break;
+
+ if (!seg)
+ return segment; /* not one of ours - leave it alone */
+
+ if (seg->align >= SEG_ABS)
+ return seg->align; /* absolute segment */
+ if (seg->grp)
+ return seg->grp->index+1; /* grouped segment */
+
+ return segment; /* no special treatment */
+}
+
+static void obj_filename (char *inname, char *outname, efunc error) {
+ strcpy(obj_infile, inname);
+ standard_extension (inname, outname, ".obj", error);
+}
+
+static void obj_write_file (void) {
+ struct Segment *seg;
+ struct Group *grp;
+ struct Public *pub;
+ struct External *ext;
+ struct ObjData *data;
+ static unsigned char boast[] = "The Netwide Assembler " NASM_VER;
+ int lname_idx, rectype;
+
+ /*
+ * Write the THEADR module header.
+ */
+ recptr = record;
+ recptr = obj_write_name (recptr, obj_infile);
+ obj_record (THEADR, record, recptr);
+
+ /*
+ * Write the NASM boast comment.
+ */
+ recptr = record;
+ recptr = obj_write_rword (recptr, 0); /* comment type zero */
+ recptr = obj_write_data (recptr, boast, sizeof(boast)-1);
+ obj_record (COMENT, record, recptr);
+
+ /*
+ * Write the first LNAMES record, containing LNAME one, which
+ * is null. Also initialise the LNAME counter.
+ */
+ recptr = record;
+ recptr = obj_write_name (recptr, "");
+ obj_record (LNAMES, record, recptr);
+ lname_idx = 2;
+
+ /*
+ * Write the SEGDEF records. Each has an associated LNAMES
+ * record.
+ */
+ for (seg = seghead; seg; seg = seg->next) {
+ int new_segdef; /* do we use the newer record type? */
+ int acbp;
+ int sn, cn, on; /* seg, class, overlay LNAME idx */
+
+ if (seg->use32 || seg->currentpos >= 0x10000)
+ new_segdef = TRUE;
+ else
+ new_segdef = FALSE;
+
+ recptr = record;
+ recptr = obj_write_name (recptr, seg->name);
+ sn = lname_idx++;
+ if (seg->segclass) {
+ recptr = obj_write_name (recptr, seg->segclass);
+ cn = lname_idx++;
+ } else
+ cn = 1;
+ if (seg->overlay) {
+ recptr = obj_write_name (recptr, seg->overlay);
+ on = lname_idx++;
+ } else
+ on = 1;
+ obj_record (LNAMES, record, recptr);
+
+ acbp = (seg->combine << 2); /* C field */
+
+ if (seg->currentpos >= 0x10000 && !new_segdef)
+ acbp |= 0x02; /* B bit */
+
+ if (seg->use32)
+ acbp |= 0x01; /* P bit is Use32 flag */
+
+ /* A field */
+ if (seg->align >= SEG_ABS)
+ acbp |= 0x00;
+ else if (seg->align >= 256) {
+ if (seg->align > 256)
+ error(ERR_NONFATAL, "segment `%s' requires more alignment"
+ " than OBJ format supports", seg->name);
+ acbp |= 0x80;
+ } else if (seg->align >= 16) {
+ acbp |= 0x60;
+ } else if (seg->align >= 4) {
+ acbp |= 0xA0;
+ } else if (seg->align >= 2) {
+ acbp |= 0x40;
+ } else
+ acbp |= 0x20;
+
+ recptr = record;
+ recptr = obj_write_byte (recptr, acbp);
+ if (seg->align & SEG_ABS) {
+ recptr = obj_write_word (recptr, seg->align - SEG_ABS);
+ recptr = obj_write_byte (recptr, 0);
+ }
+ if (new_segdef)
+ recptr = obj_write_dword (recptr, seg->currentpos);
+ else
+ recptr = obj_write_word (recptr, seg->currentpos & 0xFFFF);
+ recptr = obj_write_index (recptr, sn);
+ recptr = obj_write_index (recptr, cn);
+ recptr = obj_write_index (recptr, on);
+ if (new_segdef)
+ obj_record (SEGDEF+1, record, recptr);
+ else
+ obj_record (SEGDEF, record, recptr);
+ }
+
+ /*
+ * Write some LNAMES for the group names. lname_idx is left
+ * alone here - it will catch up when we write the GRPDEFs.
+ */
+ recptr = record;
+ for (grp = grphead; grp; grp = grp->next) {
+ recptr = obj_write_name (recptr, grp->name);
+ if (recptr - record > 1024) {
+ obj_record (LNAMES, record, recptr);
+ recptr = record;
+ }
+ }
+ if (recptr > record)
+ obj_record (LNAMES, record, recptr);
+
+ /*
+ * Write the GRPDEF records.
+ */
+ for (grp = grphead; grp; grp = grp->next) {
+ int i;
+
+ if (grp->nindices != grp->nentries) {
+ for (i = grp->nindices; i < grp->nentries; i++) {
+ error(ERR_NONFATAL, "group `%s' contains undefined segment"
+ " `%s'", grp->name, grp->segs[i].name);
+ nasm_free (grp->segs[i].name);
+ grp->segs[i].name = NULL;
+ }
+ }
+ recptr = record;
+ recptr = obj_write_index (recptr, lname_idx++);
+ for (i = 0; i < grp->nindices; i++) {
+ recptr = obj_write_byte (recptr, 0xFF);
+ recptr = obj_write_index (recptr, grp->segs[i].index);
+ }
+ obj_record (GRPDEF, record, recptr);
+ }
+
+ /*
+ * Write the PUBDEF records: first the ones in the segments,
+ * then the far-absolutes.
+ */
+ for (seg = seghead; seg; seg = seg->next) {
+ int any;
+
+ recptr = record;
+ recptr = obj_write_index (recptr, seg->grp ? seg->grp->obj_index : 0);
+ recptr = obj_write_index (recptr, seg->obj_index);
+ any = FALSE;
+ if (seg->use32)
+ rectype = PUBDEF+1;
+ else
+ rectype = PUBDEF;
+ for (pub = seg->pubhead; pub; pub = pub->next) {
+ if (recptr - record + strlen(pub->name) > 1024) {
+ if (any)
+ obj_record (rectype, record, recptr);
+ recptr = record;
+ recptr = obj_write_index (recptr, 0);
+ recptr = obj_write_index (recptr, seg->obj_index);
+ }
+ recptr = obj_write_name (recptr, pub->name);
+ if (seg->use32)
+ recptr = obj_write_dword (recptr, pub->offset);
+ else
+ recptr = obj_write_word (recptr, pub->offset);
+ recptr = obj_write_index (recptr, 0);
+ any = TRUE;
+ }
+ if (any)
+ obj_record (rectype, record, recptr);
+ }
+ for (pub = fpubhead; pub; pub = pub->next) { /* pub-crawl :-) */
+ recptr = record;
+ recptr = obj_write_index (recptr, 0); /* no group */
+ recptr = obj_write_index (recptr, 0); /* no segment either */
+ recptr = obj_write_word (recptr, pub->segment);
+ recptr = obj_write_name (recptr, pub->name);
+ recptr = obj_write_word (recptr, pub->offset);
+ recptr = obj_write_index (recptr, 0);
+ obj_record (PUBDEF, record, recptr);
+ }
+
+ /*
+ * Write the EXTDEF and COMDEF records, in order.
+ */
+ recptr = record;
+ for (ext = exthead; ext; ext = ext->next) {
+ if (ext->commonsize == 0) {
+ recptr = obj_write_name (recptr, ext->name);
+ recptr = obj_write_index (recptr, 0);
+ if (recptr - record > 1024) {
+ obj_record (EXTDEF, record, recptr);
+ recptr = record;
+ }
+ } else {
+ if (recptr > record)
+ obj_record (EXTDEF, record, recptr);
+ recptr = record;
+ if (ext->commonsize > 0) {
+ recptr = obj_write_name (recptr, ext->name);
+ recptr = obj_write_index (recptr, 0);
+ recptr = obj_write_byte (recptr, 0x61);/* far communal */
+ recptr = obj_write_value (recptr, 1L);
+ recptr = obj_write_value (recptr, ext->commonsize);
+ obj_record (COMDEF, record, recptr);
+ } else if (ext->commonsize < 0) {
+ recptr = obj_write_name (recptr, ext->name);
+ recptr = obj_write_index (recptr, 0);
+ recptr = obj_write_byte (recptr, 0x62);/* near communal */
+ recptr = obj_write_value (recptr, ext->commonsize);
+ obj_record (COMDEF, record, recptr);
+ }
+ recptr = record;
+ }
+ }
+ if (recptr > record)
+ obj_record (EXTDEF, record, recptr);
+
+ /*
+ * Write a COMENT record stating that the linker's first pass
+ * may stop processing at this point.
+ */
+ recptr = record;
+ recptr = obj_write_rword (recptr, 0x40A2);
+ recptr = obj_write_byte (recptr, 1);
+ obj_record (COMENT, record, recptr);
+
+ /*
+ * Write the LEDATA/FIXUPP pairs.
+ */
+ for (data = datahead; data; data = data->next) {
+ if (data->nonempty) {
+ obj_record (data->letype, data->ledata, data->lptr);
+ if (data->fptr != data->fixupp)
+ obj_record (FIXUPP, data->fixupp, data->fptr);
+ }
+ }
+
+ /*
+ * Write the MODEND module end marker.
+ */
+ recptr = record;
+ rectype = MODEND;
+ if (obj_entry_seg != NO_SEG) {
+ recptr = obj_write_byte (recptr, 0xC1);
+ /*
+ * Find the segment in the segment list.
+ */
+ for (seg = seghead; seg; seg = seg->next) {
+ if (seg->index == obj_entry_seg) {
+ if (seg->grp) {
+ recptr = obj_write_byte (recptr, 0x10);
+ recptr = obj_write_index (recptr, seg->grp->obj_index);
+ } else {
+ recptr = obj_write_byte (recptr, 0x50);
+ }
+ recptr = obj_write_index (recptr, seg->obj_index);
+ if (seg->use32) {
+ rectype = MODEND+1;
+ recptr = obj_write_dword (recptr, obj_entry_ofs);
+ } else
+ recptr = obj_write_word (recptr, obj_entry_ofs);
+ break;
+ }
+ }
+ if (!seg)
+ error(ERR_NONFATAL, "entry point is not in this module");
+ } else
+ recptr = obj_write_byte (recptr, 0);
+ obj_record (rectype, record, recptr);
+}
+
+static unsigned char *obj_write_data(unsigned char *ptr,
+ unsigned char *data, int len) {
+ while (len--)
+ *ptr++ = *data++;
+ return ptr;
+}
+
+static unsigned char *obj_write_byte(unsigned char *ptr, int data) {
+ *ptr++ = data;
+ return ptr;
+}
+
+static unsigned char *obj_write_word(unsigned char *ptr, int data) {
+ *ptr++ = data & 0xFF;
+ *ptr++ = (data >> 8) & 0xFF;
+ return ptr;
+}
+
+static unsigned char *obj_write_dword(unsigned char *ptr, long data) {
+ *ptr++ = data & 0xFF;
+ *ptr++ = (data >> 8) & 0xFF;
+ *ptr++ = (data >> 16) & 0xFF;
+ *ptr++ = (data >> 24) & 0xFF;
+ return ptr;
+}
+
+static unsigned char *obj_write_rword(unsigned char *ptr, int data) {
+ *ptr++ = (data >> 8) & 0xFF;
+ *ptr++ = data & 0xFF;
+ return ptr;
+}
+
+static unsigned char *obj_write_name(unsigned char *ptr, char *data) {
+ *ptr++ = strlen(data);
+ if (obj_uppercase) {
+ while (*data) {
+ *ptr++ = (unsigned char) toupper(*data);
+ data++;
+ }
+ } else {
+ while (*data)
+ *ptr++ = (unsigned char) *data++;
+ }
+ return ptr;
+}
+
+static unsigned char *obj_write_index(unsigned char *ptr, int data) {
+ if (data < 128)
+ *ptr++ = data;
+ else {
+ *ptr++ = 0x80 | ((data >> 8) & 0x7F);
+ *ptr++ = data & 0xFF;
+ }
+ return ptr;
+}
+
+static unsigned char *obj_write_value(unsigned char *ptr,
+ unsigned long data) {
+ if (data <= 128)
+ *ptr++ = data;
+ else if (data <= 0xFFFF) {
+ *ptr++ = 129;
+ *ptr++ = data & 0xFF;
+ *ptr++ = (data >> 8) & 0xFF;
+ } else if (data <= 0xFFFFFF) {
+ *ptr++ = 132;
+ *ptr++ = data & 0xFF;
+ *ptr++ = (data >> 8) & 0xFF;
+ *ptr++ = (data >> 16) & 0xFF;
+ } else {
+ *ptr++ = 136;
+ *ptr++ = data & 0xFF;
+ *ptr++ = (data >> 8) & 0xFF;
+ *ptr++ = (data >> 16) & 0xFF;
+ *ptr++ = (data >> 24) & 0xFF;
+ }
+ return ptr;
+}
+
+static void obj_record(int type, unsigned char *start, unsigned char *end) {
+ unsigned long cksum, len;
+
+ cksum = type;
+ fputc (type, ofp);
+ len = end-start+1;
+ cksum += (len & 0xFF) + ((len>>8) & 0xFF);
+ fwriteshort (len, ofp);
+ fwrite (start, 1, end-start, ofp);
+ while (start < end)
+ cksum += *start++;
+ fputc ( (-cksum) & 0xFF, ofp);
+}
+
+struct ofmt of_obj = {
+ "Microsoft MS-DOS 16-bit object files",
+ "obj",
+ obj_init,
+ obj_out,
+ obj_deflabel,
+ obj_segment,
+ obj_segbase,
+ obj_directive,
+ obj_filename,
+ obj_cleanup
+};
+
+#endif /* OF_OBJ */
diff --git a/outrdf.c b/outrdf.c
new file mode 100644
index 0000000..24fd480
--- /dev/null
+++ b/outrdf.c
@@ -0,0 +1,467 @@
+/* outrdf.c output routines for the Netwide Assembler to produce
+ * RDOFF format object files (which are intended mainly
+ * for use in proprietary projects, as the code to load and
+ * execute them is very simple). They will also be used
+ * for device drivers and possibly some executable files
+ * in the MOSCOW operating system. See Rdoff.txt for
+ * details.
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "outform.h"
+
+#ifdef OF_RDF
+
+typedef short int16; /* not sure if this will be required to be altered
+ at all... best to typedef it just in case */
+
+const char *RDOFFId = "RDOFF1"; /* written to the start of RDOFF files */
+
+/* the records that can be found in the RDOFF header */
+
+/* Note that whenever a segment is referred to in the RDOFF file, its number
+ * is always half of the segment number that NASM uses to refer to it; this
+ * is because NASM only allocates even numbered segments, so as to not
+ * waste any of the 16 bits of segment number written to the file - this
+ * allows up to 65533 external labels to be defined; otherwise it would be
+ * 32764. */
+
+struct RelocRec {
+ char type; /* must be 1 */
+ char segment; /* only 0 for code, or 1 for data supported,
+ * but add 64 for relative refs (ie do not require
+ * reloc @ loadtime, only linkage) */
+ long offset; /* from start of segment in which reference is loc'd */
+ char length; /* 1 2 or 4 bytes */
+ int16 refseg; /* segment to which reference refers to */
+};
+
+struct ImportRec {
+ char type; /* must be 2 */
+ int16 segment; /* segment number allocated to the label for reloc
+ * records - label is assumed to be at offset zero
+ * in this segment, so linker must fix up with offset
+ * of segment and of offset within segment */
+ char label[33]; /* zero terminated... should be written to file until
+ * the zero, but not after it - max len = 32 chars */
+};
+
+struct ExportRec {
+ char type; /* must be 3 */
+ char segment; /* segment referred to (0/1) */
+ long offset; /* offset within segment */
+ char label[33]; /* zero terminated as above. max len = 32 chars */
+};
+
+struct DLLRec {
+ char type; /* must be 4 */
+ char libname[128]; /* name of library to link with at load time */
+};
+
+struct BSSRec {
+ char type; /* must be 5 */
+ long amount; /* number of bytes BSS to reserve */
+};
+
+/* code for managing buffers needed to seperate code and data into individual
+ * sections until they are ready to be written to the file.
+ * We'd better hope that it all fits in memory else we're buggered... */
+
+#define BUF_BLOCK_LEN 4088 /* selected to match page size (4096)
+ * on 80x86 machines for efficiency */
+
+typedef struct memorybuffer {
+ int length;
+ char buffer[BUF_BLOCK_LEN];
+ struct memorybuffer *next;
+} memorybuffer;
+
+memorybuffer * newmembuf(){
+ memorybuffer * t;
+
+ t = nasm_malloc(sizeof(memorybuffer));
+
+ t->length = 0;
+ t->next = NULL;
+ return t;
+}
+
+void membufwrite(memorybuffer *b, void *data, int bytes) {
+ int16 w;
+ long l;
+
+ if (b->next) { /* memory buffer full - use next buffer */
+ membufwrite(b->next,data,bytes);
+ return;
+ }
+ if ((bytes < 0 && b->length - bytes > BUF_BLOCK_LEN)
+ || (bytes > 0 && b->length + bytes > BUF_BLOCK_LEN)) {
+
+ /* buffer full and no next allocated... allocate and initialise next
+ * buffer */
+
+ b->next = newmembuf();
+ membufwrite(b->next,data,bytes);
+ }
+
+ switch(bytes) {
+ case -4: /* convert to little-endian */
+ l = * (long *) data ;
+ b->buffer[b->length++] = l & 0xFF;
+ l >>= 8 ;
+ b->buffer[b->length++] = l & 0xFF;
+ l >>= 8 ;
+ b->buffer[b->length++] = l & 0xFF;
+ l >>= 8 ;
+ b->buffer[b->length++] = l & 0xFF;
+ break;
+
+ case -2:
+ w = * (int16 *) data ;
+ b->buffer[b->length++] = w & 0xFF;
+ w >>= 8 ;
+ b->buffer[b->length++] = w & 0xFF;
+ break;
+
+ default:
+ while(bytes--) {
+ b->buffer[b->length++] = *(* (unsigned char **) &data);
+
+ (* (unsigned char **) &data)++ ;
+ }
+ break;
+ }
+}
+
+void membufdump(memorybuffer *b,FILE *fp)
+{
+ if (!b) return;
+
+ fwrite (b->buffer, 1, b->length, fp);
+
+ membufdump(b->next,fp);
+}
+
+int membuflength(memorybuffer *b)
+{
+ if (!b) return 0;
+ return b->length + membuflength(b->next);
+}
+
+void freemembuf(memorybuffer *b)
+{
+ if (!b) return;
+ freemembuf(b->next);
+ nasm_free(b);
+}
+
+/***********************************************************************
+ * Actual code to deal with RDOFF ouput format begins here...
+ */
+
+/* global variables set during the initialisation phase */
+
+memorybuffer *seg[2]; /* seg 0 = code, seg 1 = data */
+memorybuffer *header; /* relocation/import/export records */
+
+FILE *ofile;
+
+int seg_warned;
+static efunc error;
+
+int segtext,segdata,segbss;
+long bsslength;
+
+static void rdf_init(FILE *fp, efunc errfunc, ldfunc ldef)
+{
+ ofile = fp;
+ error = errfunc;
+ seg[0] = newmembuf();
+ seg[1] = newmembuf();
+ header = newmembuf();
+ segtext = seg_alloc();
+ segdata = seg_alloc();
+ segbss = seg_alloc();
+ if (segtext != 0 || segdata != 2 || segbss != 4)
+ error(ERR_PANIC,"rdf segment numbers not allocated as expected (%d,%d,%d)",
+ segtext,segdata,segbss);
+ bsslength=0;
+}
+
+static long rdf_section_names(char *name, int pass, int *bits)
+{
+ /*
+ * Default is 32 bits.
+ */
+ if (!name)
+ *bits = 32;
+
+ if (!name) return 0;
+ if (!strcmp(name, ".text")) return 0;
+ else if (!strcmp(name, ".data")) return 2;
+ else if (!strcmp(name, ".bss")) return 4;
+ else
+ return NO_SEG;
+}
+
+static void write_reloc_rec(struct RelocRec *r)
+{
+ r->refseg >>= 1; /* adjust segment nos to RDF rather than NASM */
+
+ membufwrite(header,&r->type,1);
+ membufwrite(header,&r->segment,1);
+ membufwrite(header,&r->offset,-4);
+ membufwrite(header,&r->length,1);
+ membufwrite(header,&r->refseg,-2); /* 9 bytes written */
+}
+
+static void write_export_rec(struct ExportRec *r)
+{
+ r->segment >>= 1;
+
+ membufwrite(header,&r->type,1);
+ membufwrite(header,&r->segment,1);
+ membufwrite(header,&r->offset,-4);
+ membufwrite(header,r->label,strlen(r->label) + 1);
+}
+
+static void write_import_rec(struct ImportRec *r)
+{
+ r->segment >>= 1;
+
+ membufwrite(header,&r->type,1);
+ membufwrite(header,&r->segment,-2);
+ membufwrite(header,r->label,strlen(r->label) + 1);
+}
+
+static void write_bss_rec(struct BSSRec *r)
+{
+ membufwrite(header,&r->type,1);
+ membufwrite(header,&r->amount,-4);
+}
+
+static void rdf_deflabel(char *name, long segment, long offset, int is_global)
+{
+ struct ExportRec r;
+ struct ImportRec ri;
+
+ if (is_global && segment > 4) {
+ error(ERR_WARNING,"common declarations not supported... using extern");
+ is_global = 0;
+ }
+
+ if (is_global) {
+ r.type = 3;
+ r.segment = segment;
+ r.offset = offset;
+ strncpy(r.label,name,32);
+ r.label[32] = 0;
+ write_export_rec(&r);
+ }
+
+ if (segment > 4) { /* EXTERN declaration */
+ ri.type = 2;
+ ri.segment = segment;
+ strncpy(ri.label,name,32);
+ ri.label[32] = 0;
+ write_import_rec(&ri);
+ }
+}
+
+static void rdf_out (long segto, void *data, unsigned long type,
+ long segment, long wrt)
+{
+ long bytes = type & OUT_SIZMASK;
+ struct RelocRec rr;
+ unsigned char databuf[4],*pd;
+
+ segto >>= 1; /* convert NASM segment no to RDF number */
+
+ if (segto != 0 && segto != 1 && segto != 2) {
+ error(ERR_NONFATAL,"specified segment not supported by rdf output format");
+ return;
+ }
+
+ if (wrt != NO_SEG) {
+ wrt = NO_SEG; /* continue to do _something_ */
+ error (ERR_NONFATAL, "WRT not supported by rdf output format");
+ }
+
+ type &= OUT_TYPMASK;
+
+ if (segto == 2 && type != OUT_RESERVE)
+ {
+ error(ERR_NONFATAL, "BSS segments may not be initialised");
+
+ /* just reserve the space for now... */
+
+ if (type == OUT_REL2ADR)
+ bytes = 2;
+ else
+ bytes = 4;
+ type = OUT_RESERVE;
+ }
+
+ if (type == OUT_RESERVE) {
+ if (segto == 2) /* BSS segment space reserverd */
+ bsslength += bytes;
+ else
+ while (bytes --)
+ membufwrite(seg[segto],databuf,1);
+ }
+ else if (type == OUT_RAWDATA) {
+ if (segment != NO_SEG)
+ error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
+ membufwrite(seg[segto],data,bytes);
+ }
+ else if (type == OUT_ADDRESS) {
+
+ /* if segment == NO_SEG then we are writing an address of an
+ object within the same segment - do not produce reloc rec. */
+
+ if (segment != NO_SEG)
+ {
+
+ /* it's an address, so we must write a relocation record */
+
+ rr.type = 1; /* type signature */
+ rr.segment = segto; /* segment we're currently in */
+ rr.offset = membuflength(seg[segto]); /* current offset */
+ rr.length = bytes; /* length of reference */
+ rr.refseg = segment; /* segment referred to */
+ write_reloc_rec(&rr);
+ }
+
+ pd = databuf; /* convert address to little-endian */
+ if (bytes == 2)
+ WRITESHORT (pd, *(long *)data);
+ else
+ WRITELONG (pd, *(long *)data);
+
+ membufwrite(seg[segto],databuf,bytes);
+
+ }
+ else if (type == OUT_REL2ADR)
+ {
+ if (segment == segto)
+ error(ERR_PANIC, "intra-segment OUT_REL2ADR");
+ if (segment != NO_SEG && segment % 2) {
+ error(ERR_NONFATAL, "rdf format does not support segment base refs");
+ }
+
+ rr.type = 1; /* type signature */
+ rr.segment = segto+64; /* segment we're currently in + rel flag */
+ rr.offset = membuflength(seg[segto]); /* current offset */
+ rr.length = 2; /* length of reference */
+ rr.refseg = segment; /* segment referred to */
+ write_reloc_rec(&rr);
+
+ /* work out what to put in the code: offset of the end of this operand,
+ * subtracted from any data specified, so that loader can just add
+ * address of imported symbol onto it to get address relative to end of
+ * instruction: import_address + data(offset) - end_of_instrn */
+
+ rr.offset = *(long *)data -(rr.offset + bytes);
+
+ membufwrite(seg[segto],&rr.offset,-2);
+ }
+ else if (type == OUT_REL4ADR)
+ {
+ if (segment == segto)
+ error(ERR_PANIC, "intra-segment OUT_REL4ADR");
+ if (segment != NO_SEG && segment % 2) {
+ error(ERR_NONFATAL, "rdf format does not support segment base refs");
+ }
+
+ rr.type = 1; /* type signature */
+ rr.segment = segto+64; /* segment we're currently in + rel tag */
+ rr.offset = membuflength(seg[segto]); /* current offset */
+ rr.length = 4; /* length of reference */
+ rr.refseg = segment; /* segment referred to */
+ write_reloc_rec(&rr);
+
+ rr.offset = *(long *)data -(rr.offset + bytes);
+ membufwrite(seg[segto],&rr.offset,-4);
+ }
+}
+
+static void rdf_cleanup (void) {
+ long l;
+ unsigned char b[4],*d;
+ struct BSSRec bs;
+
+
+ /* should write imported & exported symbol declarations to header here */
+
+ /* generate the output file... */
+ fwrite("RDOFF1",6,1,ofile); /* file type magic number */
+
+ if (bsslength != 0) /* reserve BSS */
+ {
+ bs.type = 5;
+ bs.amount = bsslength;
+ write_bss_rec(&bs);
+ }
+
+ l = membuflength(header);d=b;
+ WRITELONG(d,l);
+
+ fwrite(b,4,1,ofile); /* write length of header */
+ membufdump(header,ofile); /* dump header */
+
+ l = membuflength(seg[0]);d=b; /* code segment */
+ WRITELONG(d,l);
+
+ fwrite(b,4,1,ofile);
+ membufdump(seg[0],ofile);
+
+ l = membuflength(seg[1]);d=b; /* data segment */
+ WRITELONG(d,l);
+
+ fwrite(b,4,1,ofile);
+ membufdump(seg[1],ofile);
+
+ freemembuf(header);
+ freemembuf(seg[0]);
+ freemembuf(seg[1]);
+ fclose(ofile);
+}
+
+static long rdf_segbase (long segment) {
+ return 0;
+}
+
+static int rdf_directive (char *directive, char *value, int pass) {
+ return 0;
+}
+
+static void rdf_filename (char *inname, char *outname, efunc error) {
+ standard_extension(inname,outname,".rdf",error);
+}
+
+struct ofmt of_rdf = {
+ "Relocatable Dynamic Object File Format v1.1",
+ "rdf",
+ rdf_init,
+ rdf_out,
+ rdf_deflabel,
+ rdf_section_names,
+ rdf_segbase,
+ rdf_directive,
+ rdf_filename,
+ rdf_cleanup
+};
+
+#endif /* OF_RDF */
diff --git a/parser.c b/parser.c
new file mode 100644
index 0000000..14c7a5b
--- /dev/null
+++ b/parser.c
@@ -0,0 +1,1306 @@
+/* parser.c source line parser for the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ *
+ * initial version 27/iii/95 by Simon Tatham
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "parser.h"
+#include "float.h"
+
+#include "names.c"
+
+
+static long reg_flags[] = { /* sizes and special flags */
+ 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
+ REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
+ REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
+ REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
+ REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
+ MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
+ REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
+ FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
+ REG_TREG
+};
+
+enum { /* special tokens */
+ S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
+ S_TWORD, S_WORD
+};
+
+static char *special_names[] = { /* and the actual text */
+ "byte", "dword", "far", "long", "near", "qword", "short", "to",
+ "tword", "word"
+};
+
+static char *prefix_names[] = {
+ "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
+ "repnz", "repz", "times"
+};
+
+/*
+ * Evaluator datatype. Expressions, within the evaluator, are
+ * stored as an array of these beasts, terminated by a record with
+ * type==0. Mostly, it's a vector type: each type denotes some kind
+ * of a component, and the value denotes the multiple of that
+ * component present in the expression. The exception is the WRT
+ * type, whose `value' field denotes the segment to which the
+ * expression is relative. These segments will be segment-base
+ * types, i.e. either odd segment values or SEG_ABS types. So it is
+ * still valid to assume that anything with a `value' field of zero
+ * is insignificant.
+ */
+typedef struct {
+ long type; /* a register, or EXPR_xxx */
+ long value; /* must be >= 32 bits */
+} expr;
+
+static void eval_reset(void);
+static expr *evaluate(int);
+
+/*
+ * ASSUMPTION MADE HERE. The number of distinct register names
+ * (i.e. possible "type" fields for an expr structure) does not
+ * exceed 126.
+ */
+#define EXPR_SIMPLE 126
+#define EXPR_WRT 127
+#define EXPR_SEGBASE 128
+
+static int is_reloc(expr *);
+static int is_simple(expr *);
+static int is_really_simple (expr *);
+static long reloc_value(expr *);
+static long reloc_seg(expr *);
+static long reloc_wrt(expr *);
+
+enum { /* token types, other than chars */
+ TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
+ TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
+ TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
+ TOKEN_FLOAT
+};
+
+struct tokenval {
+ long t_integer, t_inttwo;
+ char *t_charptr;
+};
+
+static char tempstorage[1024], *q;
+static int bsi (char *string, char **array, int size);/* binary search */
+
+static int nexttoken (void);
+static int is_comma_next (void);
+
+static char *bufptr;
+static int i;
+static struct tokenval tokval;
+static lfunc labelfunc;
+static efunc error;
+static char *label;
+static struct ofmt *outfmt;
+
+static long seg, ofs;
+
+insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
+ char *buffer, insn *result, struct ofmt *output,
+ efunc errfunc) {
+ int operand;
+ int critical;
+
+ q = tempstorage;
+ bufptr = buffer;
+ labelfunc = lookup_label;
+ outfmt = output;
+ error = errfunc;
+ seg = segment;
+ ofs = offset;
+ label = "";
+
+ i = nexttoken();
+
+ result->eops = NULL; /* must do this, whatever happens */
+
+ if (i==0) { /* blank line - ignore */
+ result->label = NULL; /* so, no label on it */
+ result->opcode = -1; /* and no instruction either */
+ return result;
+ }
+ if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
+ (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
+ error (ERR_NONFATAL, "label or instruction expected"
+ " at start of line");
+ result->label = NULL;
+ result->opcode = -1;
+ return result;
+ }
+
+ if (i == TOKEN_ID) { /* there's a label here */
+ label = result->label = tokval.t_charptr;
+ i = nexttoken();
+ if (i == ':') { /* skip over the optional colon */
+ i = nexttoken();
+ }
+ } else /* no label; so, moving swiftly on */
+ result->label = NULL;
+
+ if (i==0) {
+ result->opcode = -1; /* this line contains just a label */
+ return result;
+ }
+
+ result->nprefix = 0;
+ result->times = 1;
+
+ while (i == TOKEN_PREFIX ||
+ (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
+ /*
+ * Handle special case: the TIMES prefix.
+ */
+ if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
+ expr *value;
+
+ i = nexttoken();
+ eval_reset();
+ value = evaluate (pass);
+ if (!value) { /* but, error in evaluator */
+ result->opcode = -1; /* unrecoverable parse error: */
+ return result; /* ignore this instruction */
+ }
+ if (!is_simple (value)) {
+ error (ERR_NONFATAL,
+ "non-constant argument supplied to TIMES");
+ result->times = 1;
+ } else
+ result->times = value->value;
+ } else {
+ if (result->nprefix == MAXPREFIX)
+ error (ERR_NONFATAL,
+ "instruction has more than %d prefixes", MAXPREFIX);
+ else
+ result->prefixes[result->nprefix++] = tokval.t_integer;
+ i = nexttoken();
+ }
+ }
+
+ if (i != TOKEN_INSN) {
+ error (ERR_NONFATAL, "parser: instruction expected");
+ result->opcode = -1;
+ return result;
+ }
+
+ result->opcode = tokval.t_integer;
+ result->condition = tokval.t_inttwo;
+
+ /*
+ * RESB, RESW and RESD cannot be satisfied with incorrectly
+ * evaluated operands, since the correct values _must_ be known
+ * on the first pass. Hence, even in pass one, we set the
+ * `critical' flag on calling evaluate(), so that it will bomb
+ * out on undefined symbols. Nasty, but there's nothing we can
+ * do about it.
+ *
+ * For the moment, EQU has the same difficulty, so we'll
+ * include that.
+ */
+ if (result->opcode == I_RESB ||
+ result->opcode == I_RESW ||
+ result->opcode == I_RESD ||
+ result->opcode == I_RESQ ||
+ result->opcode == I_REST ||
+ result->opcode == I_EQU)
+ critical = pass;
+ else
+ critical = (pass==2 ? 2 : 0);
+
+ if (result->opcode == I_DB ||
+ result->opcode == I_DW ||
+ result->opcode == I_DD ||
+ result->opcode == I_DQ ||
+ result->opcode == I_DT) {
+ extop *eop, **tail = &result->eops;
+ int oper_num = 0;
+
+ /*
+ * Begin to read the DB/DW/DD/DQ/DT operands.
+ */
+ while (1) {
+ i = nexttoken();
+ if (i == 0)
+ break;
+ eop = *tail = nasm_malloc(sizeof(extop));
+ tail = &eop->next;
+ eop->next = NULL;
+ eop->type = EOT_NOTHING;
+ oper_num++;
+
+ if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
+ eop->type = EOT_DB_STRING;
+ eop->stringval = tokval.t_charptr;
+ eop->stringlen = tokval.t_inttwo;
+ i = nexttoken(); /* eat the comma */
+ continue;
+ }
+
+ if (i == TOKEN_FLOAT || i == '-') {
+ long sign = +1L;
+
+ if (i == '-') {
+ char *save = bufptr;
+ i = nexttoken();
+ sign = -1L;
+ if (i != TOKEN_FLOAT) {
+ bufptr = save;
+ i = '-';
+ }
+ }
+
+ if (i == TOKEN_FLOAT) {
+ eop->type = EOT_DB_STRING;
+ eop->stringval = q;
+ if (result->opcode == I_DD)
+ eop->stringlen = 4;
+ else if (result->opcode == I_DQ)
+ eop->stringlen = 8;
+ else if (result->opcode == I_DT)
+ eop->stringlen = 10;
+ else {
+ error(ERR_NONFATAL, "floating-point constant"
+ " encountered in `D%c' instruction",
+ result->opcode == I_DW ? 'W' : 'B');
+ eop->type = EOT_NOTHING;
+ }
+ q += eop->stringlen;
+ if (!float_const (tokval.t_charptr, sign,
+ (unsigned char *)eop->stringval,
+ eop->stringlen, error))
+ eop->type = EOT_NOTHING;
+ i = nexttoken(); /* eat the comma */
+ continue;
+ }
+ }
+
+ /* anything else */ {
+ expr *value;
+ eval_reset();
+ value = evaluate (critical);
+ if (!value) { /* but, error in evaluator */
+ result->opcode = -1;/* unrecoverable parse error: */
+ return result; /* ignore this instruction */
+ }
+ if (is_reloc(value)) {
+ eop->type = EOT_DB_NUMBER;
+ eop->offset = reloc_value(value);
+ eop->segment = reloc_seg(value);
+ eop->wrt = reloc_wrt(value);
+ } else {
+ error (ERR_NONFATAL,
+ "`%s' operand %d: expression is not simple"
+ " or relocatable",
+ insn_names[result->opcode], oper_num);
+ }
+ }
+ }
+ return result;
+ }
+
+ /* right. Now we begin to parse the operands. There may be up to three
+ * of these, separated by commas, and terminated by a zero token. */
+
+ for (operand = 0; operand < 3; operand++) {
+ expr *seg, *value; /* used most of the time */
+ int mref; /* is this going to be a memory ref? */
+
+ result->oprs[operand].addr_size = 0;/* have to zero this whatever */
+ i = nexttoken();
+ if (i == 0) break; /* end of operands: get out of here */
+ result->oprs[operand].type = 0; /* so far, no override */
+ while (i == TOKEN_SPECIAL) {/* size specifiers */
+ switch ((int)tokval.t_integer) {
+ case S_BYTE:
+ result->oprs[operand].type |= BITS8;
+ break;
+ case S_WORD:
+ result->oprs[operand].type |= BITS16;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ result->oprs[operand].type |= BITS32;
+ break;
+ case S_QWORD:
+ result->oprs[operand].type |= BITS64;
+ break;
+ case S_TWORD:
+ result->oprs[operand].type |= BITS80;
+ break;
+ case S_TO:
+ result->oprs[operand].type |= TO;
+ break;
+ case S_FAR:
+ result->oprs[operand].type |= FAR;
+ break;
+ case S_NEAR:
+ result->oprs[operand].type |= NEAR;
+ break;
+ case S_SHORT:
+ result->oprs[operand].type |= SHORT;
+ break;
+ }
+ i = nexttoken();
+ }
+
+ if (i == '[') { /* memory reference */
+ i = nexttoken();
+ mref = TRUE;
+ if (i == TOKEN_SPECIAL) { /* check for address size override */
+ switch ((int)tokval.t_integer) {
+ case S_WORD:
+ result->oprs[operand].addr_size = 16;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ result->oprs[operand].addr_size = 32;
+ break;
+ default:
+ error (ERR_NONFATAL, "invalid size specification in"
+ " effective address");
+ }
+ i = nexttoken();
+ }
+ } else /* immediate operand, or register */
+ mref = FALSE;
+
+ eval_reset();
+
+ value = evaluate (critical);
+ if (!value) { /* error in evaluator */
+ result->opcode = -1; /* unrecoverable parse error: */
+ return result; /* ignore this instruction */
+ }
+ if (i == ':' && mref) { /* it was seg:offset */
+ seg = value; /* so shift this into the segment */
+ i = nexttoken(); /* then skip the colon */
+ if (i == TOKEN_SPECIAL) { /* another check for size override */
+ switch ((int)tokval.t_integer) {
+ case S_WORD:
+ result->oprs[operand].addr_size = 16;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ result->oprs[operand].addr_size = 32;
+ break;
+ default:
+ error (ERR_NONFATAL, "invalid size specification in"
+ " effective address");
+ }
+ i = nexttoken();
+ }
+ value = evaluate (critical);
+ /* and get the offset */
+ if (!value) { /* but, error in evaluator */
+ result->opcode = -1; /* unrecoverable parse error: */
+ return result; /* ignore this instruction */
+ }
+ } else seg = NULL;
+ if (mref) { /* find ] at the end */
+ if (i != ']') {
+ error (ERR_NONFATAL, "parser: expecting ]");
+ do { /* error recovery again */
+ i = nexttoken();
+ } while (i != 0 && i != ',');
+ } else /* we got the required ] */
+ i = nexttoken();
+ } else { /* immediate operand */
+ if (i != 0 && i != ',' && i != ':') {
+ error (ERR_NONFATAL, "comma or end of line expected");
+ do { /* error recovery */
+ i = nexttoken();
+ } while (i != 0 && i != ',');
+ } else if (i == ':') {
+ result->oprs[operand].type |= COLON;
+ }
+ }
+
+ /* now convert the exprs returned from evaluate() into operand
+ * descriptions... */
+
+ if (mref) { /* it's a memory reference */
+ expr *e = value;
+ int b, i, s; /* basereg, indexreg, scale */
+ long o; /* offset */
+
+ if (seg) { /* segment override */
+ if (seg[1].type!=0 || seg->value!=1 ||
+ REG_SREG & ~reg_flags[seg->type])
+ error (ERR_NONFATAL, "invalid segment override");
+ else if (result->nprefix == MAXPREFIX)
+ error (ERR_NONFATAL,
+ "instruction has more than %d prefixes",
+ MAXPREFIX);
+ else
+ result->prefixes[result->nprefix++] = seg->type;
+ }
+
+ b = i = -1, o = s = 0;
+
+ if (e->type < EXPR_SIMPLE) { /* this bit's a register */
+ if (e->value == 1) /* in fact it can be basereg */
+ b = e->type;
+ else /* no, it has to be indexreg */
+ i = e->type, s = e->value;
+ e++;
+ }
+ if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
+ if (e->value != 1) { /* it has to be indexreg */
+ if (i != -1) { /* but it can't be */
+ error(ERR_NONFATAL, "invalid effective address");
+ result->opcode = -1;
+ return result;
+ } else
+ i = e->type, s = e->value;
+ } else { /* it can be basereg */
+ if (b != -1) /* or can it? */
+ i = e->type, s = 1;
+ else
+ b = e->type;
+ }
+ e++;
+ }
+ if (e->type != 0) { /* is there an offset? */
+ if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
+ error (ERR_NONFATAL, "invalid effective address");
+ result->opcode = -1;
+ return result;
+ } else {
+ if (e->type == EXPR_SIMPLE) {
+ o = e->value;
+ e++;
+ }
+ if (e->type == EXPR_WRT) {
+ result->oprs[operand].wrt = e->value;
+ e++;
+ } else
+ result->oprs[operand].wrt = NO_SEG;
+ if (e->type != 0) { /* is there a segment id? */
+ if (e->type < EXPR_SEGBASE) {
+ error (ERR_NONFATAL,
+ "invalid effective address");
+ result->opcode = -1;
+ return result;
+ } else
+ result->oprs[operand].segment = (e->type -
+ EXPR_SEGBASE);
+ e++;
+ } else
+ result->oprs[operand].segment = NO_SEG;
+ }
+ } else {
+ o = 0;
+ result->oprs[operand].wrt = NO_SEG;
+ result->oprs[operand].segment = NO_SEG;
+ }
+
+ if (e->type != 0) { /* there'd better be nothing left! */
+ error (ERR_NONFATAL, "invalid effective address");
+ result->opcode = -1;
+ return result;
+ }
+
+ result->oprs[operand].type |= MEMORY;
+ if (b==-1 && (i==-1 || s==0))
+ result->oprs[operand].type |= MEM_OFFS;
+ result->oprs[operand].basereg = b;
+ result->oprs[operand].indexreg = i;
+ result->oprs[operand].scale = s;
+ result->oprs[operand].offset = o;
+ } else { /* it's not a memory reference */
+ if (is_reloc(value)) { /* it's immediate */
+ result->oprs[operand].type |= IMMEDIATE;
+ result->oprs[operand].offset = reloc_value(value);
+ result->oprs[operand].segment = reloc_seg(value);
+ result->oprs[operand].wrt = reloc_wrt(value);
+ if (is_simple(value) && reloc_value(value)==1)
+ result->oprs[operand].type |= UNITY;
+ } else { /* it's a register */
+ if (value->type>=EXPR_SIMPLE || value->value!=1) {
+ error (ERR_NONFATAL, "invalid operand type");
+ result->opcode = -1;
+ return result;
+ }
+ /* clear overrides, except TO which applies to FPU regs */
+ result->oprs[operand].type &= TO;
+ result->oprs[operand].type |= REGISTER;
+ result->oprs[operand].type |= reg_flags[value->type];
+ result->oprs[operand].basereg = value->type;
+ }
+ }
+ }
+
+ result->operands = operand; /* set operand count */
+
+ while (operand<3) /* clear remaining operands */
+ result->oprs[operand++].type = 0;
+
+ /*
+ * Transform RESW, RESD, RESQ, REST into RESB.
+ */
+ switch (result->opcode) {
+ case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
+ case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
+ case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
+ case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
+ }
+
+ return result;
+}
+
+static int is_comma_next (void) {
+ char *p;
+
+ p = bufptr;
+ while (isspace(*p)) p++;
+ return (*p == ',' || *p == ';' || !*p);
+}
+
+/* isidstart matches any character that may start an identifier, and isidchar
+ * matches any character that may appear at places other than the start of an
+ * identifier. E.g. a period may only appear at the start of an identifier
+ * (for local labels), whereas a number may appear anywhere *but* at the
+ * start. */
+
+#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' )
+#define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \
+ || (c)=='@' || (c)=='~' )
+
+/* Ditto for numeric constants. */
+
+#define isnumstart(c) ( isdigit(c) || (c)=='$' )
+#define isnumchar(c) ( isalnum(c) )
+
+/* This returns the numeric value of a given 'digit'. */
+
+#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
+
+/*
+ * This tokeniser routine has only one side effect, that of
+ * updating `bufptr'. Hence by saving `bufptr', lookahead may be
+ * performed.
+ */
+
+static int nexttoken (void) {
+ char ourcopy[256], *r, *s;
+
+ while (isspace(*bufptr)) bufptr++;
+ if (!*bufptr) return 0;
+
+ /* we have a token; either an id, a number or a char */
+ if (isidstart(*bufptr) ||
+ (*bufptr == '$' && isidstart(bufptr[1]))) {
+ /* now we've got an identifier */
+ int i;
+ int is_sym = FALSE;
+
+ if (*bufptr == '$') {
+ is_sym = TRUE;
+ bufptr++;
+ }
+
+ tokval.t_charptr = q;
+ *q++ = *bufptr++;
+ while (isidchar(*bufptr)) *q++ = *bufptr++;
+ *q++ = '\0';
+ for (s=tokval.t_charptr, r=ourcopy; *s; s++)
+ *r++ = tolower (*s);
+ *r = '\0';
+ if (is_sym)
+ return TOKEN_ID; /* bypass all other checks */
+ /* right, so we have an identifier sitting in temp storage. now,
+ * is it actually a register or instruction name, or what? */
+ if ((tokval.t_integer=bsi(ourcopy, reg_names,
+ elements(reg_names)))>=0)
+ return TOKEN_REG;
+ if ((tokval.t_integer=bsi(ourcopy, insn_names,
+ elements(insn_names)))>=0)
+ return TOKEN_INSN;
+ for (i=0; i<elements(icn); i++)
+ if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
+ char *p = ourcopy + strlen(icn[i]);
+ tokval.t_integer = ico[i];
+ if ((tokval.t_inttwo=bsi(p, conditions,
+ elements(conditions)))>=0)
+ return TOKEN_INSN;
+ }
+ if ((tokval.t_integer=bsi(ourcopy, prefix_names,
+ elements(prefix_names)))>=0) {
+ tokval.t_integer += PREFIX_ENUM_START;
+ return TOKEN_PREFIX;
+ }
+ if ((tokval.t_integer=bsi(ourcopy, special_names,
+ elements(special_names)))>=0)
+ return TOKEN_SPECIAL;
+ if (!strcmp(ourcopy, "seg"))
+ return TOKEN_SEG;
+ if (!strcmp(ourcopy, "wrt"))
+ return TOKEN_WRT;
+ return TOKEN_ID;
+ } else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
+ /*
+ * It's a $ sign with no following hex number; this must
+ * mean it's a Here token ($), evaluating to the current
+ * assembly location, or a Base token ($$), evaluating to
+ * the base of the current segment.
+ */
+ bufptr++;
+ if (*bufptr == '$') {
+ bufptr++;
+ return TOKEN_BASE;
+ }
+ return TOKEN_HERE;
+ } else if (isnumstart(*bufptr)) { /* now we've got a number */
+ char *r = q;
+ int rn_error;
+
+ *q++ = *bufptr++;
+ while (isnumchar(*bufptr)) {
+ *q++ = *bufptr++;
+ }
+ if (*bufptr == '.') {
+ /*
+ * a floating point constant
+ */
+ *q++ = *bufptr++;
+ while (isnumchar(*bufptr)) {
+ *q++ = *bufptr++;
+ }
+ *q++ = '\0';
+ tokval.t_charptr = r;
+ return TOKEN_FLOAT;
+ }
+ *q++ = '\0';
+ tokval.t_integer = readnum(r, &rn_error);
+ if (rn_error)
+ return TOKEN_ERRNUM; /* some malformation occurred */
+ tokval.t_charptr = NULL;
+ return TOKEN_NUM;
+ } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */
+ char quote = *bufptr++, *r;
+ r = tokval.t_charptr = bufptr;
+ while (*bufptr && *bufptr != quote) bufptr++;
+ tokval.t_inttwo = bufptr - r; /* store full version */
+ if (!*bufptr)
+ return TOKEN_ERRNUM; /* unmatched quotes */
+ tokval.t_integer = 0;
+ r = bufptr++; /* skip over final quote */
+ while (quote != *--r) {
+ tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
+ }
+ return TOKEN_NUM;
+ } else if (*bufptr == ';') { /* a comment has happened - stay */
+ return 0;
+ } else if ((*bufptr == '>' || *bufptr == '<' ||
+ *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) {
+ bufptr += 2;
+ return (bufptr[-2] == '>' ? TOKEN_SHR :
+ bufptr[-2] == '<' ? TOKEN_SHL :
+ bufptr[-2] == '/' ? TOKEN_SDIV :
+ TOKEN_SMOD);
+ } else /* just an ordinary char */
+ return (unsigned char) (*bufptr++);
+}
+
+/* return index of "string" in "array", or -1 if no match. */
+static int bsi (char *string, char **array, int size) {
+ int i = -1, j = size; /* always, i < index < j */
+ while (j-i >= 2) {
+ int k = (i+j)/2;
+ int l = strcmp(string, array[k]);
+ if (l<0) /* it's in the first half */
+ j = k;
+ else if (l>0) /* it's in the second half */
+ i = k;
+ else /* we've got it :) */
+ return k;
+ }
+ return -1; /* we haven't got it :( */
+}
+
+void cleanup_insn (insn *i) {
+ extop *e;
+
+ while (i->eops) {
+ e = i->eops;
+ i->eops = i->eops->next;
+ nasm_free (e);
+ }
+}
+
+/* ------------- Evaluator begins here ------------------ */
+
+static expr exprtempstorage[1024], *tempptr; /* store exprs in here */
+
+/*
+ * Add two vector datatypes. We have some bizarre behaviour on far-
+ * absolute segment types: we preserve them during addition _only_
+ * if one of the segments is a truly pure scalar.
+ */
+static expr *add_vectors(expr *p, expr *q) {
+ expr *r = tempptr;
+ int preserve;
+
+ preserve = is_really_simple(p) || is_really_simple(q);
+
+ while (p->type && q->type &&
+ p->type < EXPR_SEGBASE+SEG_ABS &&
+ q->type < EXPR_SEGBASE+SEG_ABS)
+ if (p->type > q->type) {
+ tempptr->type = q->type;
+ tempptr->value = q->value;
+ tempptr++, q++;
+ } else if (p->type < q->type) {
+ tempptr->type = p->type;
+ tempptr->value = p->value;
+ tempptr++, p++;
+ } else { /* *p and *q have same type */
+ tempptr->type = p->type;
+ tempptr->value = p->value + q->value;
+ tempptr++, p++, q++;
+ }
+ while (p->type &&
+ (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) {
+ tempptr->type = p->type;
+ tempptr->value = p->value;
+ tempptr++, p++;
+ }
+ while (q->type &&
+ (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) {
+ tempptr->type = q->type;
+ tempptr->value = q->value;
+ tempptr++, q++;
+ }
+ (tempptr++)->type = 0;
+
+ return r;
+}
+
+/*
+ * Multiply a vector by a scalar. Strip far-absolute segment part
+ * if present.
+ */
+static expr *scalar_mult(expr *vect, long scalar) {
+ expr *p = vect;
+
+ while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
+ p->value = scalar * (p->value);
+ p++;
+ }
+ p->type = 0;
+
+ return vect;
+}
+
+static expr *scalarvect (long scalar) {
+ expr *p = tempptr;
+ tempptr->type = EXPR_SIMPLE;
+ tempptr->value = scalar;
+ tempptr++;
+ tempptr->type = 0;
+ tempptr++;
+ return p;
+}
+
+/*
+ * Return TRUE if the argument is a simple scalar. (Or a far-
+ * absolute, which counts.)
+ */
+static int is_simple (expr *vect) {
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type)
+ return 1;
+ if (vect->type != EXPR_SIMPLE)
+ return 0;
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
+ return 1;
+}
+
+/*
+ * Return TRUE if the argument is a simple scalar, _NOT_ a far-
+ * absolute.
+ */
+static int is_really_simple (expr *vect) {
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type)
+ return 1;
+ if (vect->type != EXPR_SIMPLE)
+ return 0;
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (vect->type) return 0;
+ return 1;
+}
+
+/*
+ * Return TRUE if the argument is relocatable (i.e. a simple
+ * scalar, plus at most one segment-base, plus possibly a WRT).
+ */
+static int is_reloc (expr *vect) {
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type)
+ return 1;
+ if (vect->type < EXPR_SIMPLE)
+ return 0;
+ if (vect->type == EXPR_SIMPLE) {
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (!vect->type)
+ return 1;
+ }
+ do {
+ vect++;
+ } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
+ if (!vect->type)
+ return 1;
+ return 1;
+}
+
+/*
+ * Return the scalar part of a relocatable vector. (Including
+ * simple scalar vectors - those qualify as relocatable.)
+ */
+static long reloc_value (expr *vect) {
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type) return 0;
+ if (vect->type == EXPR_SIMPLE)
+ return vect->value;
+ else
+ return 0;
+}
+
+/*
+ * Return the segment number of a relocatable vector, or NO_SEG for
+ * simple scalars.
+ */
+static long reloc_seg (expr *vect) {
+ while (vect->type && (vect->type == EXPR_WRT || !vect->value))
+ vect++;
+ if (vect->type == EXPR_SIMPLE) {
+ do {
+ vect++;
+ } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
+ }
+ if (!vect->type)
+ return NO_SEG;
+ else
+ return vect->type - EXPR_SEGBASE;
+}
+
+/*
+ * Return the WRT segment number of a relocatable vector, or NO_SEG
+ * if no WRT part is present.
+ */
+static long reloc_wrt (expr *vect) {
+ while (vect->type && vect->type < EXPR_WRT)
+ vect++;
+ if (vect->type == EXPR_WRT) {
+ return vect->value;
+ } else
+ return NO_SEG;
+}
+
+static void eval_reset(void) {
+ tempptr = exprtempstorage; /* initialise temporary storage */
+}
+
+/*
+ * The SEG operator: calculate the segment part of a relocatable
+ * value. Return NULL, as usual, if an error occurs. Report the
+ * error too.
+ */
+static expr *segment_part (expr *e) {
+ long seg;
+
+ if (!is_reloc(e)) {
+ error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
+ return NULL;
+ }
+
+ seg = reloc_seg(e);
+ if (seg == NO_SEG) {
+ error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
+ return NULL;
+ } else if (seg & SEG_ABS)
+ return scalarvect(seg & ~SEG_ABS);
+ else {
+ expr *f = tempptr++;
+ tempptr++->type = 0;
+ f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
+ f->value = 1;
+ return f;
+ }
+}
+
+/*
+ * Recursive-descent parser. Called with a single boolean operand,
+ * which is TRUE if the evaluation is critical (i.e. unresolved
+ * symbols are an error condition). Must update the global `i' to
+ * reflect the token after the parsed string. May return NULL.
+ *
+ * evaluate() should report its own errors: on return it is assumed
+ * that if NULL has been returned, the error has already been
+ * reported.
+ */
+
+/*
+ * Grammar parsed is:
+ *
+ * expr : expr0 [ WRT expr6 ]
+ * expr0 : expr1 [ {|} expr1]
+ * expr1 : expr2 [ {^} expr2]
+ * expr2 : expr3 [ {&} expr3]
+ * expr3 : expr4 [ {<<,>>} expr4...]
+ * expr4 : expr5 [ {+,-} expr5...]
+ * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
+ * expr6 : { ~,+,-,SEG } expr6
+ * | (expr0)
+ * | symbol
+ * | $
+ * | number
+ */
+
+static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int);
+static expr *expr4(int), *expr5(int), *expr6(int);
+
+static expr *expr0(int critical) {
+ expr *e, *f;
+
+ e = expr1(critical);
+ if (!e)
+ return NULL;
+ while (i == '|') {
+ i = nexttoken();
+ f = expr1(critical);
+ if (!f)
+ return NULL;
+ if (!is_simple(e) || !is_simple(f)) {
+ error(ERR_NONFATAL, "`|' operator may only be applied to"
+ " scalar values");
+ }
+ e = scalarvect (reloc_value(e) | reloc_value(f));
+ }
+ return e;
+}
+
+static expr *expr1(int critical) {
+ expr *e, *f;
+
+ e = expr2(critical);
+ if (!e)
+ return NULL;
+ while (i == '^') {
+ i = nexttoken();
+ f = expr2(critical);
+ if (!f)
+ return NULL;
+ if (!is_simple(e) || !is_simple(f)) {
+ error(ERR_NONFATAL, "`^' operator may only be applied to"
+ " scalar values");
+ }
+ e = scalarvect (reloc_value(e) ^ reloc_value(f));
+ }
+ return e;
+}
+
+static expr *expr2(int critical) {
+ expr *e, *f;
+
+ e = expr3(critical);
+ if (!e)
+ return NULL;
+ while (i == '&') {
+ i = nexttoken();
+ f = expr3(critical);
+ if (!f)
+ return NULL;
+ if (!is_simple(e) || !is_simple(f)) {
+ error(ERR_NONFATAL, "`&' operator may only be applied to"
+ " scalar values");
+ }
+ e = scalarvect (reloc_value(e) & reloc_value(f));
+ }
+ return e;
+}
+
+static expr *expr3(int critical) {
+ expr *e, *f;
+
+ e = expr4(critical);
+ if (!e)
+ return NULL;
+ while (i == TOKEN_SHL || i == TOKEN_SHR) {
+ int j = i;
+ i = nexttoken();
+ f = expr4(critical);
+ if (!f)
+ return NULL;
+ if (!is_simple(e) || !is_simple(f)) {
+ error(ERR_NONFATAL, "shift operator may only be applied to"
+ " scalar values");
+ }
+ switch (j) {
+ case TOKEN_SHL:
+ e = scalarvect (reloc_value(e) << reloc_value(f));
+ break;
+ case TOKEN_SHR:
+ e = scalarvect (((unsigned long)reloc_value(e)) >>
+ reloc_value(f));
+ break;
+ }
+ }
+ return e;
+}
+
+static expr *expr4(int critical) {
+ expr *e, *f;
+
+ e = expr5(critical);
+ if (!e)
+ return NULL;
+ while (i == '+' || i == '-') {
+ int j = i;
+ i = nexttoken();
+ f = expr5(critical);
+ if (!f)
+ return NULL;
+ switch (j) {
+ case '+':
+ e = add_vectors (e, f);
+ break;
+ case '-':
+ e = add_vectors (e, scalar_mult(f, -1L));
+ break;
+ }
+ }
+ return e;
+}
+
+static expr *expr5(int critical) {
+ expr *e, *f;
+
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ while (i == '*' || i == '/' || i == '*' ||
+ i == TOKEN_SDIV || i == TOKEN_SMOD) {
+ int j = i;
+ i = nexttoken();
+ f = expr6(critical);
+ if (!f)
+ return NULL;
+ if (j != '*' && (!is_simple(e) || !is_simple(f))) {
+ error(ERR_NONFATAL, "division operator may only be applied to"
+ " scalar values");
+ return NULL;
+ }
+ if (j != '*' && reloc_value(f) == 0) {
+ error(ERR_NONFATAL, "division by zero");
+ return NULL;
+ }
+ switch (j) {
+ case '*':
+ if (is_simple(e))
+ e = scalar_mult (f, reloc_value(e));
+ else if (is_simple(f))
+ e = scalar_mult (e, reloc_value(f));
+ else {
+ error(ERR_NONFATAL, "unable to multiply two "
+ "non-scalar objects");
+ return NULL;
+ }
+ break;
+ case '/':
+ e = scalarvect (((unsigned long)reloc_value(e)) /
+ ((unsigned long)reloc_value(f)));
+ break;
+ case '%':
+ e = scalarvect (((unsigned long)reloc_value(e)) %
+ ((unsigned long)reloc_value(f)));
+ break;
+ case TOKEN_SDIV:
+ e = scalarvect (((signed long)reloc_value(e)) /
+ ((signed long)reloc_value(f)));
+ break;
+ case TOKEN_SMOD:
+ e = scalarvect (((signed long)reloc_value(e)) %
+ ((signed long)reloc_value(f)));
+ break;
+ }
+ }
+ return e;
+}
+
+static expr *expr6(int critical) {
+ expr *e;
+ long label_seg, label_ofs;
+
+ if (i == '-') {
+ i = nexttoken();
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ return scalar_mult (e, -1L);
+ } else if (i == '+') {
+ i = nexttoken();
+ return expr6(critical);
+ } else if (i == '~') {
+ i = nexttoken();
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ if (!is_simple(e)) {
+ error(ERR_NONFATAL, "`~' operator may only be applied to"
+ " scalar values");
+ return NULL;
+ }
+ return scalarvect(~reloc_value(e));
+ } else if (i == TOKEN_SEG) {
+ i = nexttoken();
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ return segment_part(e);
+ } else if (i == '(') {
+ i = nexttoken();
+ e = expr0(critical);
+ if (!e)
+ return NULL;
+ if (i != ')') {
+ error(ERR_NONFATAL, "expecting `)'");
+ return NULL;
+ }
+ i = nexttoken();
+ return e;
+ } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID ||
+ i == TOKEN_HERE || i == TOKEN_BASE) {
+ e = tempptr;
+ switch (i) {
+ case TOKEN_NUM:
+ e->type = EXPR_SIMPLE;
+ e->value = tokval.t_integer;
+ break;
+ case TOKEN_REG:
+ e->type = tokval.t_integer;
+ e->value = 1;
+ break;
+ case TOKEN_ID:
+ case TOKEN_HERE:
+ case TOKEN_BASE:
+ /*
+ * Since the whole line is parsed before the label it
+ * defines is given to the label manager, we have
+ * problems with lines such as
+ *
+ * end: TIMES 512-(end-start) DB 0
+ *
+ * where `end' is not known on pass one, despite not
+ * really being a forward reference, and due to
+ * criticality it is _needed_. Hence we check our label
+ * against the currently defined one, and do our own
+ * resolution of it if we have to.
+ */
+ if (i == TOKEN_BASE) {
+ label_seg = seg;
+ label_ofs = 0;
+ } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) {
+ label_seg = seg;
+ label_ofs = ofs;
+ } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
+ if (critical == 2) {
+ error (ERR_NONFATAL, "symbol `%s' undefined",
+ tokval.t_charptr);
+ return NULL;
+ } else if (critical == 1) {
+ error (ERR_NONFATAL, "symbol `%s' not defined before use",
+ tokval.t_charptr);
+ return NULL;
+ } else {
+ label_seg = seg;
+ label_ofs = ofs;
+ }
+ }
+ e->type = EXPR_SIMPLE;
+ e->value = label_ofs;
+ if (label_seg!=NO_SEG) {
+ tempptr++;
+ tempptr->type = EXPR_SEGBASE + label_seg;
+ tempptr->value = 1;
+ }
+ break;
+ }
+ tempptr++;
+ tempptr->type = 0;
+ tempptr++;
+ i = nexttoken();
+ return e;
+ } else {
+ error(ERR_NONFATAL, "expression syntax error");
+ return NULL;
+ }
+}
+
+static expr *evaluate (int critical) {
+ expr *e;
+ expr *f = NULL;
+
+ e = expr0 (critical);
+ if (!e)
+ return NULL;
+
+ if (i == TOKEN_WRT) {
+ if (!is_reloc(e)) {
+ error(ERR_NONFATAL, "invalid left-hand operand to WRT");
+ return NULL;
+ }
+ i = nexttoken(); /* eat the WRT */
+ f = expr6 (critical);
+ if (!f)
+ return NULL;
+ }
+ e = scalar_mult (e, 1L); /* strip far-absolute segment part */
+ if (f) {
+ expr *g = tempptr++;
+ tempptr++->type = 0;
+ g->type = EXPR_WRT;
+ if (!is_reloc(f)) {
+ error(ERR_NONFATAL, "invalid right-hand operand to WRT");
+ return NULL;
+ }
+ g->value = reloc_seg(f);
+ if (g->value == NO_SEG)
+ g->value = reloc_value(f) | SEG_ABS;
+ else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
+ error(ERR_NONFATAL, "invalid right-hand operand to WRT");
+ return NULL;
+ }
+ e = add_vectors (e, g);
+ }
+ return e;
+}
diff --git a/parser.h b/parser.h
new file mode 100644
index 0000000..82d5235
--- /dev/null
+++ b/parser.h
@@ -0,0 +1,18 @@
+/* parser.h header file for the parser module of version 0.1 of the
+ * Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#ifndef NASM_PARSER_H
+#define NASM_PARSER_H
+
+insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
+ char *buffer, insn *result, struct ofmt *output,
+ efunc error);
+void cleanup_insn (insn *instruction);
+
+#endif
diff --git a/rdoff/Makefile b/rdoff/Makefile
new file mode 100644
index 0000000..2e55dde
--- /dev/null
+++ b/rdoff/Makefile
@@ -0,0 +1,43 @@
+# Makefile for RDOFF object file utils; part of the Netwide Assembler
+#
+# The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+# Julian Hall. All rights reserved. The software is
+# redistributable under the licence given in the file "Licence"
+# distributed in the NASM archive.
+#
+# This Makefile is designed for use under Unix (probably fairly
+# portably).
+
+CC = gcc
+CCFLAGS = -c -O -g -Wall -ansi -pedantic -I..
+LINK = gcc
+LINKFLAGS = -o
+DLINKFLAGS = -o
+LIBRARIES =
+STRIP = strip
+LDRDFLIBS = rdoff.o ../nasmlib.o symtab.o collectn.o
+RDXLIBS = rdoff.o rdfload.o symtab.o collectn.o
+
+.c.o:
+ $(CC) $(CCFLAGS) $*.c
+
+all : rdfdump ldrdf rdx
+
+rdfdump : rdfdump.o
+ $(LINK) $(LINKFLAGS) rdfdump rdfdump.o
+ldrdf : ldrdf.o $(LDRDFLIBS)
+ $(LINK) $(LINKFLAGS) ldrdf ldrdf.o $(LDRDFLIBS)
+rdx : rdx.o $(RDXLIBS)
+ $(LINK) $(LINKFLAGS) rdx rdx.o $(RDXLIBS)
+
+rdfdump.o : rdfdump.c
+rdoff.o : rdoff.c rdoff.h
+ldrdf.o : ldrdf.c rdoff.h ../nasmlib.h symtab.h collectn.h
+symtab.o : symtab.c symtab.h
+collectn.o : collectn.c collectn.h
+rdx.o : rdx.c rdoff.h rdfload.h symtab.h
+rdfload.o : rdfload.c rdfload.h rdoff.h collectn.h symtab.h
+
+clean :
+ rm -f *.o *~ rdfdump ldrdf rdx
+ make -C test clean
diff --git a/rdoff/collectn.c b/rdoff/collectn.c
new file mode 100644
index 0000000..c265c95
--- /dev/null
+++ b/rdoff/collectn.c
@@ -0,0 +1,40 @@
+/* collectn.c Implements variable length pointer arrays [collections]
+ *
+ * This file is public domain.
+ */
+
+#include "collectn.h"
+#include <stdlib.h>
+
+void collection_init(Collection * c)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) c->p[i] = NULL;
+ c->next = NULL;
+}
+
+void ** colln(Collection * c, int index)
+{
+ while (index >= 32) {
+ index -= 32;
+ if (c->next == NULL) {
+ c->next = malloc(sizeof(Collection));
+ collection_init(c->next);
+ }
+ c = c->next;
+ }
+ return &(c->p[index]);
+}
+
+void collection_reset(Collection *c)
+{
+ int i;
+ if (c->next) {
+ collection_reset(c->next);
+ free(c->next);
+ }
+
+ c->next = NULL;
+ for (i = 0; i < 32; i++) c->p[i] = NULL;
+}
diff --git a/rdoff/collectn.h b/rdoff/collectn.h
new file mode 100644
index 0000000..b3f2d52
--- /dev/null
+++ b/rdoff/collectn.h
@@ -0,0 +1,22 @@
+/* collectn.h Header file for 'collection' abstract data type
+ *
+ * This file is public domain, and does not come under the NASM license.
+ * It, along with 'collectn.c' implements what is basically a variable
+ * length array (of pointers)
+ */
+
+#ifndef _COLLECTN_H
+#define _COLLECTN_H
+
+typedef struct tagCollection {
+ void *p[32]; /* array of pointers to objects */
+
+ struct tagCollection *next;
+} Collection;
+
+void collection_init(Collection * c);
+void ** colln(Collection * c, int index);
+void collection_reset(Collection * c);
+
+#endif
+
diff --git a/rdoff/ldrdf.c b/rdoff/ldrdf.c
new file mode 100644
index 0000000..ce86b7e
--- /dev/null
+++ b/rdoff/ldrdf.c
@@ -0,0 +1,540 @@
+/* ldrdf.c RDOFF Object File linker/loader main program
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+/* TODO: Make the system skip a module (other than the first) if none
+ * of the other specified modules contain a reference to it.
+ * May require the system to make an extra pass of the modules to be
+ * loaded eliminating those that aren't required.
+ *
+ * Support libaries (.a files - requires a 'ranlib' type utility)
+ *
+ * -s option to strip resolved symbols from exports.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "nasm.h"
+#include "rdoff.h"
+#include "nasmlib.h"
+#include "symtab.h"
+#include "collectn.h"
+
+#define LDRDF_VERSION "0.11"
+
+/* global variables - those to set options: */
+
+int verbose = 0; /* reflects setting of command line switch */
+int align = 16;
+int errors = 0; /* set by functions to cause halt after current
+ stage of processing */
+
+/* the linked list of modules that must be loaded & linked */
+
+struct modulenode {
+ rdffile f; /* the file */
+ long coderel; /* module's code relocation factor */
+ long datarel; /* module's data relocation factor */
+ long bssrel; /* module's bss data reloc. factor */
+ void * header; /* header location, if loaded */
+ char * name; /* filename */
+ struct modulenode *next;
+};
+
+struct modulenode *modules = NULL,*lastmodule = NULL;
+
+void *symtab; /* The symbol table */
+
+rdf_headerbuf * newheader ; /* New header to be written to output */
+
+/* loadmodule - find the characteristics of a module and add it to the
+ * list of those being linked together */
+
+void loadmodule(char *filename)
+{
+ struct modulenode *prev;
+ if (! modules) {
+ modules = malloc(sizeof(struct modulenode));
+ lastmodule = modules;
+ prev = NULL;
+ }
+ else {
+ lastmodule->next = malloc(sizeof(struct modulenode));
+ prev = lastmodule;
+ lastmodule = lastmodule->next;
+ }
+
+ if (! lastmodule) {
+ fputs("ldrdf: not enough memory\n",stderr);
+ exit(1);
+ }
+
+ if (rdfopen(&lastmodule->f,filename)) {
+ rdfperror("ldrdf",filename);
+ exit(1);
+ }
+
+ lastmodule->header = NULL; /* header hasn't been loaded */
+ lastmodule->name = filename;
+ lastmodule->next = NULL;
+
+ if (prev) {
+ lastmodule->coderel = prev->coderel + prev->f.code_len;
+ if (lastmodule->coderel % align != 0)
+ lastmodule->coderel += align - (lastmodule->coderel % align);
+ lastmodule->datarel = prev->datarel + prev->f.data_len;
+ if (lastmodule->datarel % align != 0)
+ lastmodule->datarel += align - (lastmodule->datarel % align);
+ }
+ else {
+ lastmodule->coderel = 0;
+ lastmodule->datarel = 0;
+ }
+
+ if (verbose)
+ printf("%s code = %08lx (+%04lx), data = %08lx (+%04lx)\n",filename,
+ lastmodule->coderel,lastmodule->f.code_len,
+ lastmodule->datarel,lastmodule->f.data_len);
+
+}
+
+/* load_segments() allocates memory for & loads the code & data segs
+ * from the RDF modules
+ */
+
+char *text,*data;
+long textlength,datalength,bsslength;
+
+void load_segments(void)
+{
+ struct modulenode *mod;
+
+ if (!modules) {
+ fprintf(stderr,"ldrdf: nothing to do\n");
+ exit(0);
+ }
+ if (!lastmodule) {
+ fprintf(stderr,"ldrdf: panic: module list exists, but lastmodule=NULL\n");
+ exit(3);
+ }
+
+ if (verbose)
+ printf("loading modules into memory\n");
+
+ /* The following stops 16 bit DOS from crashing whilst attempting to
+ work using segments > 64K */
+ if (sizeof(int) == 2) { /* expect a 'code has no effect' warning on 32 bit
+ platforms... */
+ if (lastmodule->coderel + lastmodule->f.code_len > 65535 ||
+ lastmodule->datarel + lastmodule->f.data_len > 65535) {
+ fprintf(stderr,"ldrdf: segment length has exceeded 64K; use a 32 bit "
+ "version.\nldrdf: code size = %05lx, data size = %05lx\n",
+ lastmodule->coderel + lastmodule->f.code_len,
+ lastmodule->datarel + lastmodule->f.data_len);
+ exit(1);
+ }
+ }
+
+ text = malloc(textlength = lastmodule->coderel + lastmodule->f.code_len);
+ data = malloc(datalength = lastmodule->datarel + lastmodule->f.data_len);
+
+ if (!text || !data) {
+ fprintf(stderr,"ldrdf: out of memory\n");
+ exit(1);
+ }
+
+ mod = modules;
+ while (mod) { /* load the segments for each module */
+ mod->header = malloc(mod->f.header_len);
+ if (!mod->header) {
+ fprintf(stderr,"ldrdf: out of memory\n");
+ exit(1);
+ }
+ if (rdfloadseg(&mod->f,RDOFF_HEADER,mod->header) ||
+ rdfloadseg(&mod->f,RDOFF_CODE,&text[mod->coderel]) ||
+ rdfloadseg(&mod->f,RDOFF_DATA,&data[mod->datarel])) {
+ rdfperror("ldrdf",mod->name);
+ exit(1);
+ }
+ rdfclose(&mod->f); /* close file; segments remain */
+ mod = mod->next;
+ }
+}
+
+/* build_symbols() step through each module's header, and locate
+ * exported symbols, placing them in a global table
+ */
+
+void build_symbols()
+{
+ struct modulenode *mod;
+ rdfheaderrec *r;
+ symtabEnt e;
+ long bssloc,cbBss;
+
+ if (verbose) printf("building global symbol table:\n");
+ newheader = rdfnewheader();
+
+ symtab = symtabNew();
+ bssloc = 0; /* keep track of location of BSS symbols */
+
+ for (mod = modules; mod; mod = mod->next)
+ {
+ mod->bssrel = bssloc;
+ cbBss = 0;
+ rdfheaderrewind(&mod->f);
+ while ((r = rdfgetheaderrec(&mod->f)))
+ {
+
+ if (r->type == 5) /* Allocate BSS */
+ cbBss += r->b.amount;
+
+ if (r->type != 3) continue; /* ignore all but export recs */
+
+ e.segment = r->e.segment;
+ e.offset = r->e.offset +
+ (e.segment == 0 ? mod->coderel : /* 0 -> code */
+ e.segment == 1 ? mod->datarel : /* 1 -> data */
+ mod->bssrel) ; /* 2 -> bss */
+ e.flags = 0;
+ e.name = malloc(strlen(r->e.label) + 1);
+ if (! e.name)
+ {
+ fprintf(stderr,"ldrdf: out of memory\n");
+ exit(1);
+ }
+ strcpy(e.name,r->e.label);
+ symtabInsert(symtab,&e);
+ }
+ bssloc += cbBss;
+ }
+ if (verbose)
+ {
+ symtabDump(symtab,stdout);
+ printf("BSS length = %ld bytes\n\n",bssloc);
+ }
+ bsslength = bssloc;
+}
+
+/* link_segments() step through relocation records in each module's
+ * header, fixing up references.
+ */
+
+void link_segments(void)
+{
+ struct modulenode *mod;
+ Collection imports;
+ symtabEnt *s;
+ long rel,relto = 0; /* placate gcc */
+ char *seg;
+ rdfheaderrec *r;
+ int bRelative;
+
+ if (verbose) printf("linking segments\n");
+
+ collection_init(&imports);
+
+ for (mod = modules; mod; mod = mod->next) {
+ if (verbose >= 2) printf("* processing %s\n",mod->name);
+ rdfheaderrewind(&mod->f);
+ while((r = rdfgetheaderrec(&mod->f))) {
+ switch(r->type) {
+ case 1: /* relocation record */
+ if (r->r.segment >= 64) { /* Relative relocation; */
+ bRelative = 1; /* need to find location relative */
+ r->r.segment -= 64; /* to start of this segment */
+ relto = r->r.segment == 0 ? mod->coderel : mod->datarel;
+ }
+ else
+ bRelative = 0; /* non-relative - need to relocate
+ * at load time */
+
+ /* calculate absolute offset of reference, not rel to beginning of
+ segment */
+ r->r.offset += r->r.segment == 0 ? mod->coderel : mod->datarel;
+
+ /* calculate the relocation factor to apply to the operand -
+ the base address of one of this modules segments if referred
+ segment is 0 - 2, or the address of an imported symbol
+ otherwise. */
+
+ if (r->r.refseg == 0) rel = mod->coderel;
+ else if (r->r.refseg == 1) rel = mod->datarel;
+ else if (r->r.refseg == 2) rel = mod->bssrel;
+ else { /* cross module link - find reference */
+ s = *colln(&imports,r->r.refseg - 2);
+ if (!s) {
+ fprintf(stderr,"ldrdf: link to undefined segment %04x in"
+ " %s:%d\n", r->r.refseg,mod->name,r->r.segment);
+ errors = 1;
+ break;
+ }
+ rel = s->offset;
+
+ r->r.refseg = s->segment; /* change referred segment,
+ so that new header is
+ correct */
+ }
+
+ if (bRelative) /* Relative - subtract current segment start */
+ rel -= relto;
+ else
+ { /* Add new relocation header */
+ rdfaddheader(newheader,r);
+ }
+
+ /* Work out which segment we're making changes to ... */
+ if (r->r.segment == 0) seg = text;
+ else if (r->r.segment == 1) seg = data;
+ else {
+ fprintf(stderr,"ldrdf: relocation in unknown segment %d in "
+ "%s\n", r->r.segment,mod->name);
+ errors = 1;
+ break;
+ }
+
+ /* Add the relocation factor to the datum specified: */
+
+ if (verbose >= 3)
+ printf(" - relocating %d:%08lx by %08lx\n",r->r.segment,
+ r->r.offset,rel);
+
+ /**** The following code is non-portable. Rewrite it... ****/
+ switch(r->r.length) {
+ case 1:
+ seg[r->r.offset] += (char) rel;
+ break;
+ case 2:
+ *(int16 *)(seg + r->r.offset) += (int16) rel;
+ break;
+ case 4:
+ *(long *)(seg + r->r.offset) += rel;
+ break;
+ }
+ break;
+
+ case 2: /* import record */
+ s = symtabFind(symtab, r->i.label);
+ if (s == NULL) {
+ /* Need to add support for dynamic linkage */
+ fprintf(stderr,"ldrdf: undefined symbol %s in module %s\n",
+ r->i.label,mod->name);
+ errors = 1;
+ }
+ else
+ {
+ *colln(&imports,r->i.segment - 2) = s;
+ if (verbose >= 2)
+ printf("imported %s as %04x\n", r->i.label, r->i.segment);
+ }
+ break;
+
+ case 3: /* export; dump to output new version */
+ s = symtabFind(symtab, r->e.label);
+ if (! s) continue; /* eh? probably doesn't matter... */
+
+ r->e.offset = s->offset;
+ rdfaddheader(newheader,r);
+ break;
+
+ case 4: /* DLL record */
+ rdfaddheader(newheader,r); /* copy straight to output */
+ break;
+ }
+ }
+ collection_reset(&imports);
+ }
+}
+
+/* write_output() write linked program out to a file */
+
+void write_output(char *filename)
+{
+ FILE * fp;
+ rdfheaderrec r;
+
+ fp = fopen(filename,"wb");
+ if (! fp)
+ {
+ fprintf(stderr,"ldrdf: could not open '%s' for writing\n",filename);
+ exit(1);
+ }
+
+
+ /* add BSS length count to header... */
+ if (bsslength)
+ {
+ r.type = 5;
+ r.b.amount = bsslength;
+ rdfaddheader(newheader,&r);
+ }
+
+ /* Write header */
+ rdfwriteheader(fp,newheader);
+ rdfdoneheader(newheader);
+ newheader = NULL;
+
+ /* Write text */
+ if (fwrite(&textlength,1,4,fp) != 4
+ || fwrite(text,1,textlength,fp) !=textlength)
+ {
+ fprintf(stderr,"ldrdf: error writing %s\n",filename);
+ exit(1);
+ }
+
+ /* Write data */
+ if (fwrite(&datalength,1,4,fp) != 4 ||
+ fwrite(data,1,datalength,fp) != datalength)
+ {
+ fprintf (stderr,"ldrdf: error writing %s\n", filename);
+ exit(1);
+ }
+ fclose(fp);
+}
+
+
+/* main program: interpret command line, and pass parameters on to
+ * individual module loaders & the linker
+ *
+ * Command line format:
+ * ldrdf [-o outfile | -x] [-r xxxx] [-v] [--] infile [infile ...]
+ *
+ * Default action is to output a file named 'aout.rdx'. -x specifies
+ * that the linked object program should be executed, rather than
+ * written to a file. -r specifies that the object program should
+ * be prelocated at address 'xxxx'. This option cannot be used
+ * in conjunction with -x.
+ */
+
+const char *usagemsg = "usage:\n"
+" ldrdf [-o outfile | -x] [-a x] [-v] [-p x] [--] infile [infile ...]\n\n"
+" ldrdf -h displays this message\n"
+" ldrdf -r displays version information\n\n"
+" -o selects output filename (default is aout.rdx)\n"
+" -x causes ldrdx to link & execute rather than write to file\n"
+" -a x causes object program to be statically relocated to address 'x'\n"
+" -v turns on verbose mode\n"
+" -p x causes segments to be aligned (padded) to x byte boundaries\n"
+" (default is 16 bytes)\n";
+
+void usage(void)
+{
+ fputs(usagemsg,stderr);
+}
+
+int main(int argc,char **argv)
+{
+ char *ofilename = "aout.rdx";
+ long relocateaddr = -1; /* -1 if no relocation is to occur */
+ int execute = 0; /* 1 to execute after linking, 0 otherwise */
+ int procsw = 1; /* set to 0 by '--' */
+ int tmp;
+
+ if (argc == 1) {
+ usage();
+ exit(1);
+ }
+
+ /* process command line switches, and add modules specified to linked list
+ of modules, keeping track of total memory required to load them */
+
+ while(argv++,--argc) {
+ if (procsw && !strcmp(*argv,"-h")) { /* Help command */
+ usage(); exit(1);
+ }
+ else if (procsw && !strcmp(*argv,"-r")) {
+ printf("ldrdf version %s (%s) (%s)\n",LDRDF_VERSION,_RDOFF_H,
+ sizeof(int) == 2 ? "16 bit" : "32 bit");
+ exit(1);
+ }
+ else if (procsw && !strcmp(*argv,"-o")) {
+ ofilename = *++argv;
+ --argc;
+ if (execute) {
+ fprintf(stderr,"ldrdf: -o and -x switches incompatible\n");
+ exit(1);
+ }
+ if (verbose > 1) printf("output filename set to '%s'\n",ofilename);
+ }
+ else if (procsw && !strcmp(*argv,"-x")) {
+ execute++;
+ if (verbose > 1) printf("will execute linked object\n");
+ }
+ else if (procsw && !strcmp(*argv,"-a")) {
+ relocateaddr = readnum(*++argv,&tmp);
+ --argc;
+ if (tmp) {
+ fprintf(stderr,"ldrdf: error in parameter to '-a' switch: '%s'\n",
+ *argv);
+ exit(1);
+ }
+ if (execute) {
+ fprintf(stderr,"ldrdf: -a and -x switches incompatible\n");
+ exit(1);
+ }
+ if (verbose) printf("will relocate to %08lx\n",relocateaddr);
+ }
+ else if (procsw && !strcmp(*argv,"-v")) {
+ verbose++;
+ if (verbose == 1) printf("verbose mode selected\n");
+ }
+ else if (procsw && !strcmp(*argv,"-p")) {
+ align = readnum(*++argv,&tmp);
+ --argc;
+ if (tmp) {
+ fprintf(stderr,"ldrdf: error in parameter to '-p' switch: '%s'\n",
+ *argv);
+ exit(1);
+ }
+ if (align != 1 && align != 2 && align != 4 && align != 8 && align != 16
+ && align != 32 && align != 256) {
+ fprintf(stderr,"ldrdf: %d is an invalid alignment factor - must be"
+ "1,2,4,8,16 or 256\n",align);
+ exit(1);
+ }
+ if (verbose > 1) printf("alignment %d selected\n",align);
+ }
+ else if (procsw && !strcmp(*argv,"--")) {
+ procsw = 0;
+ }
+ else { /* is a filename */
+ if (verbose > 1) printf("processing module %s\n",*argv);
+ loadmodule(*argv);
+ }
+ }
+
+ /* we should be scanning for unresolved references, and removing
+ unreferenced modules from the list of modules here, so that
+ we know about the final size once libraries have been linked in */
+
+ load_segments(); /* having calculated size of reqd segments, load
+ each rdoff module's segments into memory */
+
+ build_symbols(); /* build a global symbol table...
+ perhaps this should be done before load_segs? */
+
+ link_segments(); /* step through each module's header, and resolve
+ references to the global symbol table.
+ This also does local address fixups. */
+
+ if (errors) {
+ fprintf(stderr,"ldrdf: there were errors - aborted\n");
+ exit(errors);
+ }
+ if (execute) {
+ fprintf(stderr,"ldrdf: module execution not yet supported\n");
+ exit(1);
+ }
+ if (relocateaddr != -1) {
+ fprintf(stderr,"ldrdf: static relocation not yet supported\n");
+ exit(1);
+ }
+
+ write_output(ofilename);
+ return 0;
+}
diff --git a/rdoff/rdf.doc b/rdoff/rdf.doc
new file mode 100644
index 0000000..300c2bc
--- /dev/null
+++ b/rdoff/rdf.doc
@@ -0,0 +1,99 @@
+RDOFF: Relocatable Dynamically-linked Object File Format
+========================================================
+
+RDOFF was designed initially to test the object-file production
+interface to NASM. It soon became apparent that it could be enhanced
+for use in serious applications due to its simplicity; code to load
+and execute an RDOFF object module is very simple. It also contains
+enhancements to allow it to be linked with a dynamic link library at
+either run- or load- time, depending on how complex you wish to make
+your loader.
+
+The RDOFF format (version 1.1, as produced by NASM v0.91) is defined
+as follows:
+
+The first six bytes of the file contain the string 'RDOFF1'. Other
+versions of the format may contain other last characters other than
+'1' - all little endian versions of the file will always contain an
+ASCII character with value greater than 32. If RDOFF is used on a
+big-endian machine at some point in the future, the version will be
+encoded in decimal rather than ASCII, so will be below 32.
+
+All multi-byte fields follwing this are encoded in either little- or
+big-endian format depending on the system described by this version
+information. Object files should be encoded in the endianness of
+their target machine; files of incorrect endianness will be rejected
+by the loader - this means that loaders do not need to convert
+endianness, as RDOFF has been designed with simplicity of loading at
+the forefront of the design requirements.
+
+The next 4 byte field is the length of the header in bytes. The
+header consists of a sequence of variable length records. Each
+record's type is identified by the first byte of the record. Record
+types 1-4 are currently supported. Record type 5 will be added in
+the near future, when I implement BSS segments. Record type 6 may be
+to do with debugging, when I get debugging implemented.
+
+Type 1: Relocation
+==================
+
+Offset Length Description
+0 1 Type (contains 1)
+1 1 Segment that contains reference (0 = text, 1 = data)
+ Add 64 to this number to indicate a relative linkage
+ to an external symbol (see notes)
+2 4 Offset of reference
+6 1 Length of reference (1,2 or 4 bytes)
+7 2 Segment to which reference is made (0 = text, 1 =
+ data, 2 = BSS [when implemented]) others are external
+ symbols.
+
+Total length = 9 bytes
+
+Type 2: Symbol Import
+=====================
+
+0 1 Type (2)
+1 2 Segment number that will be used in references to this
+ symbol.
+3 ? Null terminated string containing label (up to 32
+ chars) to match against exports in linkage.
+
+Type 3: Symbol Export
+=====================
+
+0 1 Type (3)
+1 1 Segment containing object to be exported (0/1/2)
+2 4 Offset within segment
+6 ? Null terminate string containing label to export (32
+ char maximum length)
+
+Type 4: Dynamic Link Library
+============================
+
+0 1 Type (4)
+1 ? Library name (up to 128 chars)
+
+Type 5: Reserve BSS
+===================
+
+0 1 Type (5)
+1 4 Amount of BSS space to reserve in bytes
+
+Total length: 5 bytes
+
+-----------------------------------------------------------------------------
+
+Following the header is the text (code) segment. This is preceded by
+a 4-byte integer, which is its length in bytes. This is followed by
+the length of the data segment (also 4 bytes), and finally the data
+segment.
+
+Notes
+=====
+
+Relative linking: The number stored at the address is offset
+required from the imported symbol, with the address of the end of
+the instruction subtracted from it. This means that the linker can
+simply add the address of the label relative to the beginning of the
+current segment to it.
diff --git a/rdoff/rdfdump.c b/rdoff/rdfdump.c
new file mode 100644
index 0000000..4d4f4df
--- /dev/null
+++ b/rdoff/rdfdump.c
@@ -0,0 +1,156 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+FILE *infile;
+
+long translatelong(long in) { /* translate from little endian to
+ local representation */
+ long r;
+ unsigned char *i;
+
+ i = (unsigned char *)&in;
+ r = i[3];
+ r = (r << 8) + i[2];
+ r = (r << 8) + i[1];
+ r = (r << 8) + *i;
+
+ return r;
+}
+
+int translateshort(short in) {
+ int r;
+ unsigned char *i;
+
+ i = (unsigned char *)&in;
+ r = (i[1] << 8) + *i;
+
+ return r;
+}
+void print_header(long length) {
+ unsigned char buf[129],t,s,l;
+ long o;
+ short rs;
+
+ while (length > 0) {
+ fread(&t,1,1,infile);
+ switch(t) {
+ case 1: /* relocation record */
+ fread(&s,1,1,infile);
+ fread(&o,4,1,infile);
+ fread(&l,1,1,infile);
+ fread(&rs,2,1,infile);
+ printf(" relocation: location (%04x:%08lx), length %d, "
+ "referred seg %04x\n",(int)s,translatelong(o),(int)l,
+ translateshort(rs));
+ length -= 9;
+ break;
+ case 2: /* import record */
+ fread(&rs,2,1,infile);
+ l = 0;
+ do {
+ fread(&buf[l],1,1,infile);
+ } while (buf[l++]);
+ printf(" import: segment %04x = %s\n",translateshort(rs),buf);
+ length -= l + 3;
+ break;
+ case 3: /* export record */
+ fread(&s,1,1,infile);
+ fread(&o,4,1,infile);
+ l = 0;
+ do {
+ fread(&buf[l],1,1,infile);
+ } while (buf[l++]);
+ printf(" export: (%04x:%08lx) = %s\n",(int)s,translatelong(o),buf);
+ length -= l + 6;
+ break;
+ case 4: /* DLL record */
+ l = 0;
+ do {
+ fread(&buf[l],1,1,infile);
+ } while (buf[l++]);
+ printf(" dll: %s\n",buf);
+ length -= l + 1;
+ break;
+ case 5: /* BSS reservation */
+ fread(&l,4,1,infile);
+ printf(" bss reservation: %08lx bytes\n",translatelong(l));
+ length -= 5;
+ break;
+ default:
+ printf(" unrecognised record (type %d)\n",(int)t);
+ length --;
+ }
+ }
+}
+
+int main(int argc,char **argv) {
+ char id[7];
+ long l;
+ int verbose = 0;
+
+ puts("RDOFF Dump utility v1.1 (C) Copyright 1996 Julian R Hall");
+
+ if (argc < 2) {
+ fputs("Usage: rdfdump [-v] <filename>\n",stderr);
+ exit(1);
+ }
+
+ if (! strcmp (argv[1], "-v") )
+ {
+ verbose = 1;
+ if (argc < 3)
+ {
+ fputs("required parameter missing\n",stderr);
+ exit(1);
+ }
+ argv++;
+ }
+
+ infile = fopen(argv[1],"rb");
+ if (! infile) {
+ fprintf(stderr,"rdfdump: Could not open %s",argv[1]);
+ exit(1);
+ }
+
+ fread(id,6,1,infile);
+ if (strncmp(id,"RDOFF",5)) {
+ fputs("rdfdump: File does not contain valid RDOFF header\n",stderr);
+ exit(1);
+ }
+
+ printf("File %s: RDOFF version %c\n\n",argv[1],id[5]);
+ if (id[5] < '1' || id[5] > '1') {
+ fprintf(stderr,"rdfdump: unknown RDOFF version '%c'\n",id[5]);
+ exit(1);
+ }
+
+ fread(&l,4,1,infile);
+ l = translatelong(l);
+ printf("Header (%ld bytes):\n",l);
+ print_header(l);
+
+ fread(&l,4,1,infile);
+ l = translatelong(l);
+ printf("\nText segment length = %ld bytes\n",l);
+ while(l--) {
+ fread(id,1,1,infile);
+ if (verbose) printf(" %02x",(int) (unsigned char)id[0]);
+ }
+ if (verbose) printf("\n\n");
+
+ fread(&l,4,1,infile);
+ l = translatelong(l);
+ printf("Data segment length = %ld bytes\n",l);
+
+ if (verbose)
+ {
+ while (l--) {
+ fread(id,1,1,infile);
+ printf(" %02x",(int) (unsigned char) id[0]);
+ }
+ printf("\n");
+ }
+ fclose(infile);
+ return 0;
+}
diff --git a/rdoff/rdfload.c b/rdoff/rdfload.c
new file mode 100644
index 0000000..ad340b3
--- /dev/null
+++ b/rdoff/rdfload.c
@@ -0,0 +1,173 @@
+/* rdfload.c RDOFF Object File loader library
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ *
+ * Permission to use this file in your own projects is granted, as long
+ * as acknowledgement is given in an appropriate manner to its authors,
+ * with instructions of how to obtain a copy via ftp.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "rdfload.h"
+#include "symtab.h"
+#include "rdoff.h"
+#include "collectn.h"
+
+extern int rdf_errno;
+
+rdfmodule * rdfload(const char *filename)
+{
+ rdfmodule * f = malloc(sizeof(rdfmodule));
+ long bsslength = 0;
+ char * hdr;
+ rdfheaderrec *r;
+
+ if (f == NULL)
+ {
+ rdf_errno = 6; /* out of memory */
+ return NULL;
+ }
+
+ f->symtab = symtabNew();
+ if (!f->symtab)
+ {
+ free(f);
+ rdf_errno = 6;
+ return NULL;
+ }
+
+ /* open the file */
+ if ( rdfopen( &(f->f), filename ) ) {
+ free(f);
+ return NULL;
+ }
+
+ /* read in text and data segments, and header */
+
+ f->t = malloc (f->f.code_len);
+ f->d = malloc (f->f.data_len); /* BSS seg allocated later */
+ hdr = malloc (f->f.header_len);
+
+ if (! f->t || ! f->d || !hdr) {
+ rdf_errno = 6;
+ rdfclose(&f->f);
+ if (f->t) free(f->t);
+ if (f->d) free(f->d);
+ free(f);
+ return NULL;
+ }
+
+ if ( rdfloadseg (&f->f,RDOFF_HEADER,hdr) ||
+ rdfloadseg (&f->f,RDOFF_CODE,f->t) ||
+ rdfloadseg (&f->f,RDOFF_DATA,f->d) )
+ {
+ rdfclose(&f->f);
+ free(f->t);
+ free(f->d);
+ free(f);
+ free(hdr);
+ return NULL;
+ }
+
+ rdfclose(&f->f);
+
+ /* Allocate BSS segment; step through header and count BSS records */
+
+ while ( ( r = rdfgetheaderrec (&f->f) ) )
+ {
+ if (r->type == 5)
+ bsslength += r->b.amount;
+ }
+
+ f->b = malloc ( bsslength );
+ if (! f->b )
+ {
+ free(f->t);
+ free(f->d);
+ free(f);
+ free(hdr);
+ rdf_errno = 6;
+ return NULL;
+ }
+
+ rdfheaderrewind (&f->f);
+
+ f->textrel = (long)f->t;
+ f->datarel = (long)f->d;
+ f->bssrel = (long)f->b;
+
+ return f;
+}
+
+int rdf_relocate(rdfmodule * m)
+{
+ rdfheaderrec * r;
+ Collection imports;
+ symtabEnt e;
+ long rel;
+ unsigned char * seg;
+
+ rdfheaderrewind ( & m->f );
+ collection_init(&imports);
+
+ while ( (r = rdfgetheaderrec ( & m->f ) ) )
+ {
+ switch (r->type)
+ {
+ case 1: /* Relocation record */
+
+ /* calculate relocation factor */
+
+ if (r->r.refseg == 0) rel = m->textrel;
+ else if (r->r.refseg == 1) rel = m->datarel;
+ else if (r->r.refseg == 2) rel = m->bssrel;
+ else
+ /* We currently do not support load-time linkage.
+ This should be added some time soon... */
+
+ return 1; /* return error code */
+
+ if ((r->r.segment & 63) == 0) seg = m->t;
+ else if ((r->r.segment & 63) == 1) seg = m->d;
+ else
+ return 1;
+
+ /* it doesn't matter in this case that the code is non-portable,
+ as the entire concept of executing a module like this is
+ non-portable */
+ switch(r->r.length) {
+ case 1:
+ seg[r->r.offset] += (char) rel;
+ break;
+ case 2:
+ *(int16 *)(seg + r->r.offset) += (int16) rel;
+ break;
+ case 4:
+ *(long *)(seg + r->r.offset) += rel;
+ break;
+ }
+ break;
+
+ case 3: /* export record - add to symtab */
+ e.segment = r->e.segment;
+ e.offset = r->e.offset +
+ (e.segment == 0 ? m->textrel : /* 0 -> code */
+ e.segment == 1 ? m->datarel : /* 1 -> data */
+ m->bssrel) ; /* 2 -> bss */
+ e.flags = 0;
+ e.name = malloc(strlen(r->e.label) + 1);
+ if (! e.name)
+ return 1;
+
+ strcpy(e.name,r->e.label);
+ symtabInsert(m->symtab,&e);
+ break;
+ }
+ }
+ return 0;
+}
diff --git a/rdoff/rdfload.h b/rdoff/rdfload.h
new file mode 100644
index 0000000..5e264b9
--- /dev/null
+++ b/rdoff/rdfload.h
@@ -0,0 +1,29 @@
+/* rdfload.h RDOFF Object File loader library header file
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ *
+ * See the file 'rdfload.c' for special license information for this
+ * file.
+ */
+
+#ifndef _RDFLOAD_H
+#define _RDFLOAD_H
+
+#include "rdoff.h"
+
+typedef struct RDFModuleStruct {
+ rdffile f; /* file structure */
+ unsigned char * t, * d, * b; /* text, data, and bss segments */
+ long textrel;
+ long datarel;
+ long bssrel;
+ void * symtab;
+} rdfmodule;
+
+rdfmodule * rdfload(const char * filename);
+int rdf_relocate(rdfmodule * m);
+
+#endif
diff --git a/rdoff/rdoff.c b/rdoff/rdoff.c
new file mode 100644
index 0000000..9a969ad
--- /dev/null
+++ b/rdoff/rdoff.c
@@ -0,0 +1,367 @@
+/* rdoff.c library of routines for manipulating rdoff files
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+/* TODO: The functions in this module assume they are running
+ * on a little-endian machine. This should be fixed to
+ * make it portable.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "rdoff.h"
+
+/* ========================================================================
+ * Code for memory buffers (for delayed writing of header until we know
+ * how long it is).
+ * ======================================================================== */
+
+
+memorybuffer * newmembuf(){
+ memorybuffer * t;
+
+ t = malloc(sizeof(memorybuffer));
+
+ t->length = 0;
+ t->next = NULL;
+ return t;
+}
+
+void membufwrite(memorybuffer *b, void *data, int bytes) {
+ int16 w;
+ long l;
+
+ if (b->next) { /* memory buffer full - use next buffer */
+ membufwrite(b->next,data,bytes);
+ return;
+ }
+ if ((bytes < 0 && b->length - bytes > BUF_BLOCK_LEN)
+ || (bytes > 0 && b->length + bytes > BUF_BLOCK_LEN)) {
+
+ /* buffer full and no next allocated... allocate and initialise next
+ * buffer */
+
+ b->next = newmembuf();
+ membufwrite(b->next,data,bytes);
+ }
+
+ switch(bytes) {
+ case -4: /* convert to little-endian */
+ l = * (long *) data ;
+ b->buffer[b->length++] = l & 0xFF;
+ l >>= 8 ;
+ b->buffer[b->length++] = l & 0xFF;
+ l >>= 8 ;
+ b->buffer[b->length++] = l & 0xFF;
+ l >>= 8 ;
+ b->buffer[b->length++] = l & 0xFF;
+ break;
+
+ case -2:
+ w = * (int16 *) data ;
+ b->buffer[b->length++] = w & 0xFF;
+ w >>= 8 ;
+ b->buffer[b->length++] = w & 0xFF;
+ break;
+
+ default:
+ while(bytes--) {
+ b->buffer[b->length++] = *(* (unsigned char **) &data);
+
+ (* (unsigned char **) &data)++ ;
+ }
+ break;
+ }
+}
+
+void membufdump(memorybuffer *b,FILE *fp)
+{
+ if (!b) return;
+
+ fwrite (b->buffer, 1, b->length, fp);
+
+ membufdump(b->next,fp);
+}
+
+int membuflength(memorybuffer *b)
+{
+ if (!b) return 0;
+ return b->length + membuflength(b->next);
+}
+
+void freemembuf(memorybuffer *b)
+{
+ if (!b) return;
+ freemembuf(b->next);
+ free(b);
+}
+
+/* =========================================================================
+ General purpose routines and variables used by the library functions
+ ========================================================================= */
+
+long translatelong(long in) { /* translate from little endian to
+ local representation */
+ long r;
+ unsigned char *i;
+
+ i = (unsigned char *)&in;
+ r = i[3];
+ r = (r << 8) + i[2];
+ r = (r << 8) + i[1];
+ r = (r << 8) + *i;
+
+ return r;
+}
+
+const char *RDOFFId = "RDOFF1"; /* written to the start of RDOFF files */
+
+const char *rdf_errors[7] = {
+ "no error occurred","could not open file","invalid file format",
+ "error reading file","unknown error","header not read",
+ "out of memory"};
+
+int rdf_errno = 0;
+
+/* ========================================================================
+ The library functions
+ ======================================================================== */
+
+int rdfopen(rdffile *f, const char *name)
+{
+ char buf[8];
+
+ if (translatelong(0x01020304) != 0x01020304)
+ { /* fix this to be portable! */
+ fputs("*** this program requires a little endian machine\n",stderr);
+ fprintf(stderr,"01020304h = %08lxh\n",translatelong(0x01020304));
+ exit(3);
+ }
+
+
+ f->fp = fopen(name,"rb");
+ if (!f->fp) return rdf_errno = 1; /* error 1: file open error */
+
+ fread(buf,6,1,f->fp); /* read header */
+ buf[6] = 0;
+
+ if (strcmp(buf,RDOFFId)) {
+ fclose(f->fp);
+ return rdf_errno = 2; /* error 2: invalid file format */
+ }
+
+ if (fread(&f->header_len,1,4,f->fp) != 4) {
+ fclose(f->fp);
+ return rdf_errno = 3; /* error 3: file read error */
+ }
+
+ if (fseek(f->fp,f->header_len,SEEK_CUR)) {
+ fclose(f->fp);
+ return rdf_errno = 2; /* seek past end of file...? */
+ }
+
+ if (fread(&f->code_len,1,4,f->fp) != 4) {
+ fclose(f->fp);
+ return rdf_errno = 3;
+ }
+
+ f->code_ofs = ftell(f->fp);
+ if (fseek(f->fp,f->code_len,SEEK_CUR)) {
+ fclose(f->fp);
+ return rdf_errno = 2;
+ }
+
+ if (fread(&f->data_len,1,4,f->fp) != 4) {
+ fclose(f->fp);
+ return rdf_errno = 3;
+ }
+
+ f->data_ofs = ftell(f->fp);
+ rewind(f->fp);
+ f->header_loc = NULL;
+ return 0;
+}
+
+int rdfclose(rdffile *f)
+{
+ fclose(f->fp);
+ return 0;
+}
+
+void rdfperror(const char *app,const char *name)
+{
+ fprintf(stderr,"%s:%s: %s\n",app,name,rdf_errors[rdf_errno]);
+}
+
+int rdfloadseg(rdffile *f,int segment,void *buffer)
+{
+ long fpos;
+ long slen;
+
+ switch(segment) {
+ case RDOFF_HEADER:
+ fpos = 10;
+ slen = f->header_len;
+ f->header_loc = (char *)buffer;
+ f->header_fp = 0;
+ break;
+ case RDOFF_CODE:
+ fpos = f->code_ofs;
+ slen = f->code_len;
+ break;
+ case RDOFF_DATA:
+ fpos = f->data_ofs;
+ slen = f->data_len;
+ break;
+ default:
+ fpos = 0;
+ slen = 0;
+ }
+
+ if (fseek(f->fp,fpos,SEEK_SET))
+ return rdf_errno = 4;
+
+ if (fread(buffer,1,slen,f->fp) != slen)
+ return rdf_errno = 3;
+
+ return 0;
+}
+
+/* Macros for reading integers from header in memory */
+
+#define RI8(v) v = f->header_loc[f->header_fp++]
+#define RI16(v) { v = (f->header_loc[f->header_fp] + \
+ (f->header_loc[f->header_fp+1] << 8)); \
+ f->header_fp += 2; }
+
+#define RI32(v) { v = (f->header_loc[f->header_fp] + \
+ (f->header_loc[f->header_fp+1] << 8) + \
+ (f->header_loc[f->header_fp+2] << 16) + \
+ (f->header_loc[f->header_fp+3] << 24)); \
+ f->header_fp += 4; }
+
+#define RS(str,max) { for(i=0;i<max;i++){\
+ RI8(str[i]); if (!str[i]) break;} str[i]=0; }
+
+rdfheaderrec *rdfgetheaderrec(rdffile *f)
+{
+ static rdfheaderrec r;
+ int i;
+
+ if (!f->header_loc) {
+ rdf_errno = 5;
+ return NULL;
+ }
+
+ if (f->header_fp >= f->header_len) return 0;
+
+ RI8(r.type);
+ switch(r.type) {
+ case 1: /* Relocation record */
+ RI8(r.r.segment);
+ RI32(r.r.offset);
+ RI8(r.r.length);
+ RI16(r.r.refseg);
+ break;
+
+ case 2: /* Imported symbol record */
+ RI16(r.i.segment);
+ RS(r.i.label,32);
+ break;
+
+ case 3: /* Exported symbol record */
+ RI8(r.e.segment);
+ RI32(r.e.offset);
+ RS(r.e.label,32);
+ break;
+
+ case 4: /* DLL record */
+ RS(r.d.libname,127);
+ break;
+
+ case 5: /* BSS reservation record */
+ RI32(r.b.amount);
+ break;
+
+ default:
+ rdf_errno = 2; /* invalid file */
+ return NULL;
+ }
+ return &r;
+}
+
+void rdfheaderrewind(rdffile *f)
+{
+ f->header_fp = 0;
+}
+
+
+rdf_headerbuf * rdfnewheader(void)
+{
+ return newmembuf();
+}
+
+int rdfaddheader(rdf_headerbuf * h, rdfheaderrec * r)
+{
+ switch (r->type)
+ {
+ case 1:
+ membufwrite(h,&r->type,1);
+ membufwrite(h,&r->r.segment,1);
+ membufwrite(h,&r->r.offset,-4);
+ membufwrite(h,&r->r.length,1);
+ membufwrite(h,&r->r.refseg,-2); /* 9 bytes written */
+ break;
+
+ case 2: /* import */
+ membufwrite(h,&r->type,1);
+ membufwrite(h,&r->i.segment,-2);
+ membufwrite(h,&r->i.label,strlen(r->i.label) + 1);
+ break ;
+
+ case 3: /* export */
+ membufwrite(h,&r->type,1);
+ membufwrite(h,&r->e.segment,1);
+ membufwrite(h,&r->e.offset,-4);
+ membufwrite(h,&r->e.label,strlen(r->e.label) + 1);
+ break ;
+
+ case 4: /* DLL */
+ membufwrite(h,&r->type,1);
+ membufwrite(h,&r->d.libname,strlen(r->d.libname) + 1);
+ break ;
+
+ case 5: /* BSS */
+ membufwrite(h,&r->type,1);
+ membufwrite(h,&r->b.amount,-4);
+ break ;
+
+ default:
+ return (rdf_errno = 2);
+ }
+ return 0;
+}
+
+int rdfwriteheader(FILE * fp, rdf_headerbuf * h)
+{
+ long l;
+
+ fwrite (RDOFFId, 1, strlen(RDOFFId), fp) ;
+
+ l = translatelong ( membuflength (h) );
+ fwrite (&l, 4, 1, fp);
+
+ membufdump(h, fp);
+
+ return 0; /* no error handling in here... CHANGE THIS! */
+}
+
+void rdfdoneheader(rdf_headerbuf * h)
+{
+ freemembuf(h);
+}
diff --git a/rdoff/rdoff.h b/rdoff/rdoff.h
new file mode 100644
index 0000000..b022400
--- /dev/null
+++ b/rdoff/rdoff.h
@@ -0,0 +1,112 @@
+/* rdoff.h RDOFF Object File manipulation routines header file
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#ifndef _RDOFF_H
+#define _RDOFF_H "RDOFF1 support routines v0.1"
+
+typedef short int16; /* not sure if this will be required to be altered
+ at all... best to typedef it just in case */
+
+/* the records that can be found in the RDOFF header */
+
+struct RelocRec {
+ char type; /* must be 1 */
+ char segment; /* only 0 for code, or 1 for data supported,
+ but add 64 for relative refs (ie do not require
+ reloc @ loadtime, only linkage) */
+ long offset; /* from start of segment in which reference is loc'd */
+ char length; /* 1 2 or 4 bytes */
+ int16 refseg; /* segment to which reference refers to */
+};
+
+struct ImportRec {
+ char type; /* must be 2 */
+ int16 segment; /* segment number allocated to the label for reloc
+ records - label is assumed to be at offset zero
+ in this segment, so linker must fix up with offset
+ of segment and of offset within segment */
+ char label[33]; /* zero terminated... should be written to file until
+ the zero, but not after it - max len = 32 chars */
+};
+
+struct ExportRec {
+ char type; /* must be 3 */
+ char segment; /* segment referred to (0/1) */
+ long offset; /* offset within segment */
+ char label[33]; /* zero terminated as above. max len = 32 chars */
+};
+
+struct DLLRec {
+ char type; /* must be 4 */
+ char libname[128]; /* name of library to link with at load time */
+};
+
+struct BSSRec {
+ char type; /* must be 5 */
+ long amount; /* number of bytes BSS to reserve */
+};
+
+typedef union RDFHeaderRec {
+ char type; /* invariant throughout all below */
+ struct RelocRec r; /* type == 1 */
+ struct ImportRec i; /* type == 2 */
+ struct ExportRec e; /* type == 3 */
+ struct DLLRec d; /* type == 4 */
+ struct BSSRec b; /* type == 5 */
+} rdfheaderrec;
+
+typedef struct RDFFileInfo {
+ FILE *fp; /* file descriptor; must be open to use this struct */
+ int rdoff_ver; /* should be 1; any higher => not guaranteed to work */
+ long header_len;
+ long code_len;
+ long data_len;
+ long code_ofs;
+ long data_ofs;
+ char *header_loc; /* keep location of header */
+ long header_fp; /* current location within header for reading */
+} rdffile;
+
+#define BUF_BLOCK_LEN 4088 /* selected to match page size (4096)
+ * on 80x86 machines for efficiency */
+typedef struct memorybuffer {
+ int length;
+ char buffer[BUF_BLOCK_LEN];
+ struct memorybuffer *next;
+} memorybuffer;
+
+typedef memorybuffer rdf_headerbuf;
+
+/* segments used by RDOFF, understood by rdoffloadseg */
+#define RDOFF_CODE 0
+#define RDOFF_DATA 1
+#define RDOFF_HEADER -1
+/* mask for 'segment' in relocation records to find if relative relocation */
+#define RDOFF_RELATIVEMASK 64
+/* mask to find actual segment value in relocation records */
+#define RDOFF_SEGMENTMASK 63
+
+/* RDOFF file manipulation functions */
+int rdfopen(rdffile *f,const char *name);
+int rdfclose(rdffile *f);
+int rdfloadseg(rdffile *f,int segment,void *buffer);
+rdfheaderrec *rdfgetheaderrec(rdffile *f); /* returns static storage */
+void rdfheaderrewind(rdffile *f); /* back to start of header */
+void rdfperror(const char *app,const char *name);
+
+/* functions to write a new RDOFF header to a file -
+ use rdfnewheader to allocate a header, rdfaddheader to add records to it,
+ rdfwriteheader to write 'RDOFF1', length of header, and the header itself
+ to a file, and then rdfdoneheader to dispose of the header */
+
+rdf_headerbuf *rdfnewheader(void);
+int rdfaddheader(rdf_headerbuf *h,rdfheaderrec *r);
+int rdfwriteheader(FILE *fp,rdf_headerbuf *h);
+void rdfdoneheader(rdf_headerbuf *h);
+
+#endif /* _RDOFF_H */
diff --git a/rdoff/rdx.c b/rdoff/rdx.c
new file mode 100644
index 0000000..28ffc42
--- /dev/null
+++ b/rdoff/rdx.c
@@ -0,0 +1,61 @@
+/* rdx.c RDOFF Object File loader program
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+/* note: most of the actual work of this program is done by the modules
+ "rdfload.c", which loads and relocates the object file, and by "rdoff.c",
+ which contains general purpose routines to manipulate RDOFF object
+ files. You can use these files in your own program to load RDOFF objects
+ and execute the code in them in a similar way to what is shown here. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "rdfload.h"
+#include "rdoff.h"
+#include "symtab.h"
+
+typedef int (*main_fn) (int,char**); /* Main function prototype */
+
+int main(int argc, char **argv)
+{
+ rdfmodule * m;
+ main_fn code;
+ symtabEnt * s;
+
+ if (argc < 2)
+ {
+ puts("usage: rdf <rdoff-executable> [params]\n");
+ exit(255);
+ }
+
+ m = rdfload(argv[1]);
+
+ if (! m)
+ {
+ rdfperror("rdf",argv[1]);
+ exit(255);
+ }
+
+ rdf_relocate(m); /* in this instance, the default relocation
+ values will work fine, but they may need changing
+ in other cases... */
+
+ s = symtabFind(m->symtab, "_main");
+ if (! s)
+ {
+ fprintf(stderr,"rdx: could not find symbol '_main' in '%s'\n",argv[1]);
+ exit(255);
+ }
+
+ code = (main_fn) s->offset;
+
+ argv++, argc--; /* remove 'rdx' from command line */
+
+ return code(argc,argv); /* execute */
+}
+
diff --git a/rdoff/symtab.c b/rdoff/symtab.c
new file mode 100644
index 0000000..c0ff3e5
--- /dev/null
+++ b/rdoff/symtab.c
@@ -0,0 +1,80 @@
+/* symtab.c Routines to maintain and manipulate a symbol table
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "symtab.h"
+
+/* TODO: Implement a hash table, not this stupid implementation which
+ is too slow to be of practical use */
+
+/* Private data types */
+
+typedef struct tagSymtab {
+ symtabEnt ent;
+ struct tagSymtab * next;
+} symtabList;
+
+typedef symtabList * _symtab;
+
+void *symtabNew(void)
+{
+ void *p = malloc(sizeof(_symtab));
+ if (p == NULL) {
+ fprintf(stderr,"symtab: out of memory\n");
+ exit(3);
+ }
+ *(_symtab *)p = NULL;
+
+ return p;
+}
+
+void symtabDone(void *symtab)
+{
+ /* DO SOMETHING HERE! */
+}
+
+void symtabInsert(void *symtab,symtabEnt *ent)
+{
+ symtabList *l = malloc(sizeof(symtabList));
+
+ if (l == NULL) {
+ fprintf(stderr,"symtab: out of memory\n");
+ exit(3);
+ }
+
+ l->ent = *ent;
+ l->next = *(_symtab *)symtab;
+ *(_symtab *)symtab = l;
+}
+
+symtabEnt *symtabFind(void *symtab,char *name)
+{
+ symtabList *l = *(_symtab *)symtab;
+
+ while (l) {
+ if (!strcmp(l->ent.name,name)) {
+ return &(l->ent);
+ }
+ l = l->next;
+ }
+ return NULL;
+}
+
+void symtabDump(void *symtab,FILE *of)
+{
+ symtabList *l = *(_symtab *)symtab;
+
+ while(l) {
+ fprintf(of,"%32s %s:%08lx (%ld)\n",l->ent.name,
+ l->ent.segment ? "data" : "code" ,
+ l->ent.offset, l->ent.flags);
+ l = l->next;
+ }
+}
+
diff --git a/rdoff/symtab.h b/rdoff/symtab.h
new file mode 100644
index 0000000..5780d44
--- /dev/null
+++ b/rdoff/symtab.h
@@ -0,0 +1,22 @@
+/* symtab.h Header file for symbol table manipulation routines
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+typedef struct {
+ char *name;
+ long segment;
+ long offset;
+ long flags;
+} symtabEnt;
+
+void *symtabNew(void);
+void symtabDone(void *symtab);
+void symtabInsert(void *symtab,symtabEnt *ent);
+symtabEnt *symtabFind(void *symtab,char *name);
+void symtabDump(void *symtab,FILE *of);
+
+
diff --git a/sync.c b/sync.c
new file mode 100644
index 0000000..77212d8
--- /dev/null
+++ b/sync.c
@@ -0,0 +1,84 @@
+/* sync.c the Netwide Disassembler synchronisation processing module
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#include <stdio.h>
+#include <limits.h>
+
+#include "sync.h"
+
+#define SYNC_MAX 4096 /* max # of sync points */
+
+static struct Sync {
+ unsigned long pos;
+ unsigned long length;
+} synx[SYNC_MAX];
+static int nsynx;
+
+void init_sync(void) {
+ nsynx = 0;
+}
+
+void add_sync(unsigned long pos, unsigned long length) {
+ int i;
+
+ if (nsynx == SYNC_MAX)
+ return; /* can't do anything - overflow */
+
+ nsynx++;
+ synx[nsynx].pos = pos;
+ synx[nsynx].length = length;
+
+ for (i = nsynx; i > 1; i /= 2) {
+ if (synx[i/2].pos > synx[i].pos) {
+ struct Sync t;
+ t = synx[i/2]; /* structure copy */
+ synx[i/2] = synx[i]; /* structure copy again */
+ synx[i] = t; /* another structure copy */
+ }
+ }
+}
+
+unsigned long next_sync(unsigned long position, unsigned long *length) {
+ while (nsynx > 0 && synx[1].pos + synx[1].length <= position) {
+ int i, j;
+ struct Sync t;
+ t = synx[nsynx]; /* structure copy */
+ synx[nsynx] = synx[1]; /* structure copy */
+ synx[1] = t; /* ditto */
+
+ nsynx--;
+
+ i = 1;
+ while (i*2 <= nsynx) {
+ j = i*2;
+ if (synx[j].pos < synx[i].pos &&
+ (j+1 > nsynx || synx[j+1].pos > synx[j].pos)) {
+ t = synx[j]; /* structure copy */
+ synx[j] = synx[i]; /* lots of these... */
+ synx[i] = t; /* ...aren't there? */
+ i = j;
+ } else if (j+1 <= nsynx && synx[j+1].pos < synx[i].pos) {
+ t = synx[j+1]; /* structure copy */
+ synx[j+1] = synx[i]; /* structure <yawn> copy */
+ synx[i] = t; /* structure copy <zzzz....> */
+ i = j+1;
+ } else
+ break;
+ }
+ }
+
+ if (nsynx > 0) {
+ if (length)
+ *length = synx[1].length;
+ return synx[1].pos;
+ } else {
+ if (length)
+ *length = 0L;
+ return ULONG_MAX;
+ }
+}
diff --git a/sync.h b/sync.h
new file mode 100644
index 0000000..ecb9201
--- /dev/null
+++ b/sync.h
@@ -0,0 +1,16 @@
+/* sync.h header file for sync.c
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ */
+
+#ifndef NASM_SYNC_H
+#define NASM_SYNC_H
+
+void init_sync(void);
+void add_sync(unsigned long position, unsigned long length);
+unsigned long next_sync(unsigned long position, unsigned long *length);
+
+#endif
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..5f0e5c6
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,2 @@
+clean:
+ rm -f *.o *.obj *.com bintest inctest
diff --git a/test/aouttest.asm b/test/aouttest.asm
new file mode 100644
index 0000000..c52f112
--- /dev/null
+++ b/test/aouttest.asm
@@ -0,0 +1,83 @@
+; test source file for assembling to a.out
+; build with:
+; nasm -f aout aouttest.asm
+; gcc -o aouttest aouttest.c aouttest.o
+; (assuming your gcc is a.out)
+
+; This file should test the following:
+; [1] Define and export a global text-section symbol
+; [2] Define and export a global data-section symbol
+; [3] Define and export a global BSS-section symbol
+; [4] Define a non-global text-section symbol
+; [5] Define a non-global data-section symbol
+; [6] Define a non-global BSS-section symbol
+; [7] Define a COMMON symbol
+; [8] Define a NASM local label
+; [9] Reference a NASM local label
+; [10] Import an external symbol
+; [11] Make a PC-relative call to an external symbol
+; [12] Reference a text-section symbol in the text section
+; [13] Reference a data-section symbol in the text section
+; [14] Reference a BSS-section symbol in the text section
+; [15] Reference a text-section symbol in the data section
+; [16] Reference a data-section symbol in the data section
+; [17] Reference a BSS-section symbol in the data section
+
+[BITS 32]
+[GLOBAL _lrotate] ; [1]
+[GLOBAL _greet] ; [1]
+[GLOBAL _asmstr] ; [2]
+[GLOBAL _textptr] ; [2]
+[GLOBAL _selfptr] ; [2]
+[GLOBAL _integer] ; [3]
+[EXTERN _printf] ; [10]
+[COMMON _commvar 4] ; [7]
+
+[SECTION .text]
+
+; prototype: long lrotate(long x, int num);
+_lrotate: ; [1]
+ push ebp
+ mov ebp,esp
+ mov eax,[ebp+8]
+ mov ecx,[ebp+12]
+.label rol eax,1 ; [4] [8]
+ loop .label ; [9] [12]
+ mov esp,ebp
+ pop ebp
+ ret
+
+; prototype: void greet(void);
+_greet mov eax,[_integer] ; [14]
+ inc eax
+ mov [localint],eax ; [14]
+ push dword [_commvar]
+ mov eax,[localptr] ; [13]
+ push dword [eax] ;
+ push dword [_integer] ; [1] [14]
+ push dword _printfstr ; [13]
+ call _printf ; [11]
+ add esp,16
+ ret
+
+[SECTION .data]
+
+; a string
+_asmstr db 'hello, world', 0 ; [2]
+
+; a string for Printf
+_printfstr db "integer==%d, localint==%d, commvar=%d"
+ db 10, 0
+
+; some pointers
+localptr dd localint ; [5] [17]
+_textptr dd _greet ; [15]
+_selfptr dd _selfptr ; [16]
+
+[SECTION .bss]
+
+; an integer
+_integer resd 1 ; [3]
+
+; a local integer
+localint resd 1 ; [6]
diff --git a/test/aouttest.c b/test/aouttest.c
new file mode 100644
index 0000000..9a8eba3
--- /dev/null
+++ b/test/aouttest.c
@@ -0,0 +1,35 @@
+/*
+ * test source file for assembling to a.out
+ * build with:
+ * nasm -f aout aouttest.asm
+ * gcc -o aouttest aouttest.c aouttest.o
+ * (assuming your gcc is a.out)
+ */
+
+#include <stdio.h>
+
+extern int lrotate(long, int);
+extern void greet(void);
+extern char asmstr[];
+extern void *selfptr;
+extern void *textptr;
+extern int integer, commvar;
+
+int main(void) {
+
+ printf("Testing lrotate: should get 0x00400000, 0x00000001\n");
+ printf("lrotate(0x00040000, 4) = 0x%08lx\n", lrotate(0x40000,4));
+ printf("lrotate(0x00040000, 14) = 0x%08lx\n", lrotate(0x40000,14));
+
+ printf("This string should read `hello, world': `%s'\n", asmstr);
+
+ printf("The integers here should be 1234, 1235 and 4321:\n");
+ integer = 1234;
+ commvar = 4321;
+ greet();
+
+ printf("These pointers should be equal: %p and %p\n",
+ &greet, textptr);
+
+ printf("So should these: %p and %p\n", selfptr, &selfptr);
+}
diff --git a/test/bintest.asm b/test/bintest.asm
new file mode 100644
index 0000000..0a3c4ae
--- /dev/null
+++ b/test/bintest.asm
@@ -0,0 +1,56 @@
+; test source file for assembling to binary files
+; build with:
+; nasm -f bin -o bintest.com bintest.asm
+
+; When run (as a DOS .COM file), this program should print
+; hello, world
+; on two successive lines, then exit cleanly.
+
+; This file should test the following:
+; [1] Define a text-section symbol
+; [2] Define a data-section symbol
+; [3] Define a BSS-section symbol
+; [4] Define a NASM local label
+; [5] Reference a NASM local label
+; [6] Reference a text-section symbol in the text section
+; [7] Reference a data-section symbol in the text section
+; [8] Reference a BSS-section symbol in the text section
+; [9] Reference a text-section symbol in the data section
+; [10] Reference a data-section symbol in the data section
+; [11] Reference a BSS-section symbol in the data section
+
+[BITS 16]
+[ORG 0x100]
+
+[SECTION .text]
+
+ jmp start ; [6]
+
+end mov ax,0x4c00 ; [1]
+ int 0x21
+
+start mov byte [bss_sym],',' ; [1] [8]
+ mov bx,[bssptr] ; [7]
+ mov al,[bx]
+ mov bx,[dataptr] ; [7]
+ mov [bx],al
+ mov cx,2
+.loop mov dx,datasym ; [1] [4] [7]
+ mov ah,9
+ push cx
+ int 0x21
+ pop cx
+ loop .loop ; [5] [6]
+ mov bx,[textptr] ; [7]
+ jmp bx
+
+[SECTION .data]
+
+datasym db 'hello world', 13, 10, '$' ; [2]
+bssptr dw bss_sym ; [2] [11]
+dataptr dw datasym+5 ; [2] [10]
+textptr dw end ; [2] [9]
+
+[SECTION .bss]
+
+bss_sym resb 1 ; [3]
diff --git a/test/cofftest.asm b/test/cofftest.asm
new file mode 100644
index 0000000..bb843a1
--- /dev/null
+++ b/test/cofftest.asm
@@ -0,0 +1,82 @@
+; test source file for assembling to COFF
+; build with (under DJGPP, for example):
+; nasm -f coff cofftest.asm
+; gcc -o cofftest cofftest.c cofftest.o
+
+; This file should test the following:
+; [1] Define and export a global text-section symbol
+; [2] Define and export a global data-section symbol
+; [3] Define and export a global BSS-section symbol
+; [4] Define a non-global text-section symbol
+; [5] Define a non-global data-section symbol
+; [6] Define a non-global BSS-section symbol
+; [7] Define a COMMON symbol
+; [8] Define a NASM local label
+; [9] Reference a NASM local label
+; [10] Import an external symbol
+; [11] Make a PC-relative call to an external symbol
+; [12] Reference a text-section symbol in the text section
+; [13] Reference a data-section symbol in the text section
+; [14] Reference a BSS-section symbol in the text section
+; [15] Reference a text-section symbol in the data section
+; [16] Reference a data-section symbol in the data section
+; [17] Reference a BSS-section symbol in the data section
+
+[BITS 32]
+[GLOBAL _lrotate] ; [1]
+[GLOBAL _greet] ; [1]
+[GLOBAL _asmstr] ; [2]
+[GLOBAL _textptr] ; [2]
+[GLOBAL _selfptr] ; [2]
+[GLOBAL _integer] ; [3]
+[EXTERN _printf] ; [10]
+[COMMON _commvar 4] ; [7]
+
+[SECTION .text]
+
+; prototype: long lrotate(long x, int num);
+_lrotate: ; [1]
+ push ebp
+ mov ebp,esp
+ mov eax,[ebp+8]
+ mov ecx,[ebp+12]
+.label rol eax,1 ; [4] [8]
+ loop .label ; [9] [12]
+ mov esp,ebp
+ pop ebp
+ ret
+
+; prototype: void greet(void);
+_greet mov eax,[_integer] ; [14]
+ inc eax
+ mov [localint],eax ; [14]
+ push dword [_commvar]
+ mov eax,[localptr] ; [13]
+ push dword [eax]
+ push dword [_integer] ; [1] [14]
+ push dword _printfstr ; [13]
+ call _printf ; [11]
+ add esp,16
+ ret
+
+[SECTION .data]
+
+; a string
+_asmstr db 'hello, world', 0 ; [2]
+
+; a string for Printf
+_printfstr db "integer==%d, localint==%d, commvar=%d"
+ db 10, 0
+
+; some pointers
+localptr dd localint ; [5] [17]
+_textptr dd _greet ; [15]
+_selfptr dd _selfptr ; [16]
+
+[SECTION .bss]
+
+; an integer
+_integer resd 1 ; [3]
+
+; a local integer
+localint resd 1 ; [6]
diff --git a/test/cofftest.c b/test/cofftest.c
new file mode 100644
index 0000000..4dec0df
--- /dev/null
+++ b/test/cofftest.c
@@ -0,0 +1,34 @@
+/*
+ * test source file for assembling to COFF
+ * build with (under DJGPP, for example):
+ * nasm -f coff cofftest.asm
+ * gcc -o cofftest cofftest.c cofftest.o
+ */
+
+#include <stdio.h>
+
+extern int lrotate(long, int);
+extern void greet(void);
+extern char asmstr[];
+extern void *selfptr;
+extern void *textptr;
+extern int integer, commvar;
+
+int main(void) {
+
+ printf("Testing lrotate: should get 0x00400000, 0x00000001\n");
+ printf("lrotate(0x00040000, 4) = 0x%08lx\n", lrotate(0x40000,4));
+ printf("lrotate(0x00040000, 14) = 0x%08lx\n", lrotate(0x40000,14));
+
+ printf("This string should read `hello, world': `%s'\n", asmstr);
+
+ printf("The integers here should be 1234, 1235 and 4321:\n");
+ integer = 1234;
+ commvar = 4321;
+ greet();
+
+ printf("These pointers should be equal: %p and %p\n",
+ &greet, textptr);
+
+ printf("So should these: %p and %p\n", selfptr, &selfptr);
+}
diff --git a/test/elftest.asm b/test/elftest.asm
new file mode 100644
index 0000000..a6034a6
--- /dev/null
+++ b/test/elftest.asm
@@ -0,0 +1,83 @@
+; test source file for assembling to ELF
+; build with:
+; nasm -f elf elftest.asm
+; gcc -o elftest elftest.c elftest.o
+; (assuming your gcc is ELF)
+
+; This file should test the following:
+; [1] Define and export a global text-section symbol
+; [2] Define and export a global data-section symbol
+; [3] Define and export a global BSS-section symbol
+; [4] Define a non-global text-section symbol
+; [5] Define a non-global data-section symbol
+; [6] Define a non-global BSS-section symbol
+; [7] Define a COMMON symbol
+; [8] Define a NASM local label
+; [9] Reference a NASM local label
+; [10] Import an external symbol
+; [11] Make a PC-relative call to an external symbol
+; [12] Reference a text-section symbol in the text section
+; [13] Reference a data-section symbol in the text section
+; [14] Reference a BSS-section symbol in the text section
+; [15] Reference a text-section symbol in the data section
+; [16] Reference a data-section symbol in the data section
+; [17] Reference a BSS-section symbol in the data section
+
+[BITS 32]
+[GLOBAL lrotate] ; [1]
+[GLOBAL greet] ; [1]
+[GLOBAL asmstr] ; [2]
+[GLOBAL textptr] ; [2]
+[GLOBAL selfptr] ; [2]
+[GLOBAL integer] ; [3]
+[EXTERN printf] ; [10]
+[COMMON commvar 4] ; [7]
+
+[SECTION .text]
+
+; prototype: long lrotate(long x, int num);
+lrotate: ; [1]
+ push ebp
+ mov ebp,esp
+ mov eax,[ebp+8]
+ mov ecx,[ebp+12]
+.label rol eax,1 ; [4] [8]
+ loop .label ; [9] [12]
+ mov esp,ebp
+ pop ebp
+ ret
+
+; prototype: void greet(void);
+greet mov eax,[integer] ; [14]
+ inc eax
+ mov [localint],eax ; [14]
+ push dword [commvar]
+ mov eax,[localptr] ; [13]
+ push dword [eax]
+ push dword [integer] ; [1] [14]
+ push dword printfstr ; [13]
+ call printf ; [11]
+ add esp,16
+ ret
+
+[SECTION .data]
+
+; a string
+asmstr db 'hello, world', 0 ; [2]
+
+; a string for Printf
+printfstr db "integer==%d, localint==%d, commvar=%d"
+ db 10, 0
+
+; some pointers
+localptr dd localint ; [5] [17]
+textptr dd greet ; [15]
+selfptr dd selfptr ; [16]
+
+[SECTION .bss]
+
+; an integer
+integer resd 1 ; [3]
+
+; a local integer
+localint resd 1 ; [6]
diff --git a/test/elftest.c b/test/elftest.c
new file mode 100644
index 0000000..1965fcf
--- /dev/null
+++ b/test/elftest.c
@@ -0,0 +1,35 @@
+/*
+ * test source file for assembling to ELF
+ * build with:
+ * nasm -f elf elftest.asm
+ * gcc -o elftest elftest.c elftest.o
+ * (assuming your gcc is ELF)
+ */
+
+#include <stdio.h>
+
+extern int lrotate(long, int);
+extern void greet(void);
+extern char asmstr[];
+extern void *selfptr;
+extern void *textptr;
+extern int integer, commvar;
+
+int main(void) {
+
+ printf("Testing lrotate: should get 0x00400000, 0x00000001\n");
+ printf("lrotate(0x00040000, 4) = 0x%08lx\n", lrotate(0x40000,4));
+ printf("lrotate(0x00040000, 14) = 0x%08lx\n", lrotate(0x40000,14));
+
+ printf("This string should read `hello, world': `%s'\n", asmstr);
+
+ printf("The integers here should be 1234, 1235 and 4321:\n");
+ integer = 1234;
+ commvar = 4321;
+ greet();
+
+ printf("These pointers should be equal: %p and %p\n",
+ &greet, textptr);
+
+ printf("So should these: %p and %p\n", selfptr, &selfptr);
+}
diff --git a/test/inc1.asm b/test/inc1.asm
new file mode 100644
index 0000000..e9e5819
--- /dev/null
+++ b/test/inc1.asm
@@ -0,0 +1,4 @@
+; This file is part of the include test.
+; See inctest.asm for build instructions.
+
+message: db 'hello, world',13,10,'$'
diff --git a/test/inc2.asm b/test/inc2.asm
new file mode 100644
index 0000000..c3ba2f7
--- /dev/null
+++ b/test/inc2.asm
@@ -0,0 +1,8 @@
+; This file is part of the include test.
+; See inctest.asm for build instructions.
+
+_main: mov dx,message
+ mov ah,9
+ int 21h
+ mov ax,4c00h
+ int 21h
diff --git a/test/inctest.asm b/test/inctest.asm
new file mode 100644
index 0000000..95ab40f
--- /dev/null
+++ b/test/inctest.asm
@@ -0,0 +1,15 @@
+; This file, plus inc1.asm and inc2.asm, test NASM's file inclusion
+; mechanism.
+;
+; This produces a DOS .COM file: to assemble, use
+; nasm -f bin inctest.asm -o inctest.com
+; and when run, it should print `hello, world'.
+
+[BITS 16]
+[ORG 0x100]
+
+ jmp _main
+
+[INC inc1.asm]
+
+[INCLUDE inc2.asm]
diff --git a/test/objlink.c b/test/objlink.c
new file mode 100644
index 0000000..2f92f05
--- /dev/null
+++ b/test/objlink.c
@@ -0,0 +1,30 @@
+/*
+ * test source file for assembling to Microsoft 16-bit .OBJ
+ * build with (16-bit Microsoft C):
+ * nasm -f obj objtest.asm
+ * cl /AL objtest.obj objlink.c
+ * other compilers should work too, provided they handle large
+ * model in the same way as MS C
+ */
+
+#include <stdio.h>
+
+char text[] = "hello, world\n";
+
+extern void function(char *);
+extern int bsssym, commvar;
+extern void *selfptr;
+extern void *selfptr2;
+
+int main(void) {
+ printf("these should be identical: %p, %p\n",
+ (long) selfptr, (long) &selfptr);
+ printf("these should be equivalent but different: %p, %p\n",
+ (long) selfptr2, (long) &selfptr2);
+ printf("you should see \"hello, world\" twice:\n");
+ bsssym = 0xF00D;
+ commvar = 0xD00F;
+ function(text);
+ printf("this should be 0xF00E: 0x%X\n", bsssym);
+ printf("this should be 0xD00E: 0x%X\n", commvar);
+}
diff --git a/test/objtest.asm b/test/objtest.asm
new file mode 100644
index 0000000..8530bae
--- /dev/null
+++ b/test/objtest.asm
@@ -0,0 +1,82 @@
+; test source file for assembling to Microsoft 16-bit .OBJ
+; build with (16-bit Microsoft C):
+; nasm -f obj objtest.asm
+; cl /AL objtest.obj objlink.c
+; other compilers should work too, provided they handle large
+; model in the same way as MS C
+
+; This file should test the following:
+; [1] Define and export a global symbol
+; [2] Define a non-global symbol
+; [3] Define a common symbol
+; [4] Define a NASM local label
+; [5] Reference a NASM local label
+; [6] Import an external symbol
+; [7] Make a PC-relative relocated reference
+; [8] Reference a symbol in the same section as itself
+; [9] Reference a symbol in a different segment from itself
+; [10] Define a segment group
+; [11] Take the offset of a symbol in a grouped segment w.r.t. its segment
+; [12] Reserve uninitialised data space in a segment
+; [13] Directly take the segment address of a segment
+; [14] Directly take the segment address of a group
+; [15] Use SEG on a non-external
+; [16] Use SEG on an external
+
+[bits 16]
+
+[global _bsssym] ; [1]
+[global _function] ; [1]
+[global _selfptr] ; [1]
+[global _selfptr2] ; [1]
+[common _commvar 2] ; [3]
+[extern _printf] ; [6]
+
+[group mygroup mybss mydata] ; [10]
+[group mygroup2 mycode mycode2] ; [10]
+
+[segment mycode private]
+
+_function push bp
+ mov bp,sp
+ push ds
+ mov ax,mygroup ; [14]
+ mov ds,ax
+ inc word [_bsssym] ; [9]
+ mov ax,seg _commvar
+ mov ds,ax
+ dec word [_commvar]
+ pop ds
+ mov ax,[bp+6]
+ mov dx,[bp+8]
+ push dx
+ push ax
+ push dx
+ push ax
+ call far [cs:.printf] ; [5] [8]
+ pop ax
+ pop ax
+ call trampoline ; [7]
+ pop ax
+ pop ax
+ mov sp,bp
+ pop bp
+ retf
+
+.printf dw _printf, seg _printf ; [2] [4] [16]
+
+[segment mycode2 private]
+
+trampoline: pop ax
+ push cs
+ push ax
+ jmp far _printf
+
+[segment mybss private]
+
+_bsssym resw 64 ; [12]
+
+[segment mydata private]
+
+_selfptr dw _selfptr, seg _selfptr ; [8] [15]
+_selfptr2 dw _selfptr2 wrt mydata, mydata ; [11] [13]