diff options
Diffstat (limited to 'test/tmap.nas')
-rw-r--r-- | test/tmap.nas | 1447 |
1 files changed, 1447 insertions, 0 deletions
diff --git a/test/tmap.nas b/test/tmap.nas new file mode 100644 index 0000000..51b477f --- /dev/null +++ b/test/tmap.nas @@ -0,0 +1,1447 @@ +;; NASM note: this file abuses the section flags in such a way that
+;; NASM 0.98.37 broke when this was compiled with:
+;; nasm -o tmap.o -f elf -DLINUX tmap.nas
+
+;;-----------------------------------------------------------------------------
+;;
+;; $Id$
+;;
+;; Copyright (C) 1998-2000 by DooM Legacy Team.
+;;
+;; This program is free software; you can redistribute it and/or
+;; modify it under the terms of the GNU General Public License
+;; as published by the Free Software Foundation; either version 2
+;; of the License, or (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;;
+;; $Log$ +;; Revision 1.2 2003/09/10 23:33:38 hpa +;; Use the version of tmap.nas that actually caused problems +;;
+;; Revision 1.10 2001/02/24 13:35:21 bpereira
+;; no message
+;;
+;; Revision 1.9 2001/02/10 15:24:19 hurdler
+;; Apply Rob's patch for Linux version
+;;
+;; Revision 1.8 2000/11/12 09:48:15 bpereira
+;; no message
+;;
+;; Revision 1.7 2000/11/06 20:52:16 bpereira
+;; no message
+;;
+;; Revision 1.6 2000/11/03 11:48:40 hurdler
+;; Fix compiling problem under win32 with 3D-Floors and FragglScript (to verify!)
+;;
+;; Revision 1.5 2000/11/03 03:27:17 stroggonmeth
+;; Again with the bug fixing...
+;;
+;; Revision 1.4 2000/11/02 17:50:10 stroggonmeth
+;; Big 3Dfloors & FraggleScript commit!!
+;;
+;; Revision 1.3 2000/04/24 20:24:38 bpereira
+;; no message
+;;
+;; Revision 1.2 2000/02/27 00:42:11 hurdler
+;; fix CR+LF problem
+;;
+;; Revision 1.1.1.1 2000/02/22 20:32:32 hurdler
+;; Initial import into CVS (v1.29 pr3)
+;;
+;;
+;; DESCRIPTION:
+;; assembler optimised rendering code for software mode
+;; draw floor spans, and wall columns.
+;;
+;;-----------------------------------------------------------------------------
+
+
+[BITS 32]
+
+%ifdef LINUX
+%macro cextern 1
+[extern %1]
+%endmacro
+
+%macro cglobal 1
+[global %1]
+%endmacro
+
+%define CODE_SEG .data
+%else
+%macro cextern 1
+%define %1 _%1
+[extern %1]
+%endmacro
+
+%macro cglobal 1
+%define %1 _%1
+[global %1]
+%endmacro
+
+%define CODE_SEG .text
+%endif
+
+
+;; externs
+;; columns
+cextern dc_x
+cextern dc_yl
+cextern dc_yh
+cextern ylookup
+cextern columnofs
+cextern dc_source
+cextern dc_texturemid
+cextern dc_iscale
+cextern centery
+cextern dc_colormap
+cextern dc_transmap
+cextern colormaps
+
+;; spans
+cextern ds_x1
+cextern ds_x2
+cextern ds_y
+cextern ds_xfrac
+cextern ds_yfrac
+cextern ds_xstep
+cextern ds_ystep
+cextern ds_source
+cextern ds_colormap
+;cextern ds_textureheight
+
+; polygon edge rasterizer
+cextern prastertab
+
+
+;;----------------------------------------------------------------------
+;;
+;; R_DrawColumn
+;;
+;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
+;; TO DO: optimise it much farther... should take at most 3 cycles/pix
+;; once it's fixed, add code to patch the offsets so that it
+;; works in every screen width.
+;;
+;;----------------------------------------------------------------------
+
+[SECTION .data]
+
+;;.align 4
+loopcount dd 0
+pixelcount dd 0
+tystep dd 0
+
+[SECTION CODE_SEG write]
+
+;----------------------------------------------------------------------------
+;fixed_t FixedMul (fixed_t a, fixed_t b)
+;----------------------------------------------------------------------------
+cglobal FixedMul
+; align 16
+FixedMul:
+ mov eax,[esp+4]
+ imul dword [esp+8]
+ shrd eax,edx,16
+ ret
+
+;----------------------------------------------------------------------------
+;fixed_t FixedDiv2 (fixed_t a, fixed_t b);
+;----------------------------------------------------------------------------
+cglobal FixedDiv2
+; align 16
+FixedDiv2:
+ mov eax,[esp+4]
+ mov edx,eax ;; these two instructions allow the next
+ sar edx,31 ;; two to pair, on the Pentium processor.
+ shld edx,eax,16
+ sal eax,16
+ idiv dword [esp+8]
+ ret
+
+;----------------------------------------------------------------------------
+; void ASM_PatchRowBytes (int rowbytes);
+;----------------------------------------------------------------------------
+cglobal ASM_PatchRowBytes
+; align 16
+ASM_PatchRowBytes:
+ mov eax,[esp+4]
+ mov [p1+2],eax
+ mov [p2+2],eax
+ mov [p3+2],eax
+ mov [p4+2],eax
+ mov [p5+2],eax
+ mov [p6+2],eax
+ mov [p7+2],eax
+ mov [p8+2],eax
+ mov [p9+2],eax
+ mov [pa+2],eax
+ mov [pb+2],eax
+ mov [pc+2],eax
+ mov [pd+2],eax
+ mov [pe+2],eax
+ mov [pf+2],eax
+ mov [pg+2],eax
+ mov [ph+2],eax
+ mov [pi+2],eax
+ mov [pj+2],eax
+ mov [pk+2],eax
+ mov [pl+2],eax
+ mov [pm+2],eax
+ mov [pn+2],eax
+ mov [po+2],eax
+ mov [pp+2],eax
+ mov [pq+2],eax
+ add eax,eax
+ mov [q1+2],eax
+ mov [q2+2],eax
+ mov [q3+2],eax
+ mov [q4+2],eax
+ mov [q5+2],eax
+ mov [q6+2],eax
+ mov [q7+2],eax
+ mov [q8+2],eax
+ ret
+
+
+;----------------------------------------------------------------------------
+; 8bpp column drawer
+;----------------------------------------------------------------------------
+
+cglobal R_DrawColumn_8
+; align 16
+R_DrawColumn_8:
+ push ebp ;; preserve caller's stack frame pointer
+ push esi ;; preserve register variables
+ push edi
+ push ebx
+;;
+;; dest = ylookup[dc_yl] + columnofs[dc_x];
+;;
+ mov ebp,[dc_yl]
+ mov ebx,ebp
+ mov edi,[ylookup+ebx*4]
+ mov ebx,[dc_x]
+ add edi,[columnofs+ebx*4] ;; edi = dest
+;;
+;; pixelcount = yh - yl + 1
+;;
+ mov eax,[dc_yh]
+ inc eax
+ sub eax,ebp ;; pixel count
+ mov [pixelcount],eax ;; save for final pixel
+ jle near vdone ;; nothing to scale
+;;
+;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
+;;
+ mov ecx,[dc_iscale] ;; fracstep
+ mov eax,[centery]
+ sub eax,ebp
+ imul eax,ecx
+ mov edx,[dc_texturemid]
+ sub edx,eax
+ mov ebx,edx
+ shr ebx,16 ;; frac int.
+ and ebx,0x7f
+ shl edx,16 ;; y frac up
+
+ mov ebp,ecx
+ shl ebp,16 ;; fracstep f. up
+ shr ecx,16 ;; fracstep i. ->cl
+ and cl,0x7f
+ mov esi,[dc_source]
+;;
+;; lets rock :) !
+;;
+ mov eax,[pixelcount]
+ mov dh,al
+ shr eax,2
+ mov ch,al ;; quad count
+ mov eax,[dc_colormap]
+ test dh,0x3
+ je near v4quadloop
+;;
+;; do un-even pixel
+;;
+ test dh,0x1
+ je two_uneven
+
+ mov al,[esi+ebx] ;; prep un-even loops
+ add edx,ebp ;; ypos f += ystep f
+ adc bl,cl ;; ypos i += ystep i
+ mov dl,[eax] ;; colormap texel
+ and bl,0x7f ;; mask 0-127 texture index
+ mov [edi],dl ;; output pixel
+p1: add edi,0x12345678
+;;
+;; do two non-quad-aligned pixels
+;;
+two_uneven:
+ test dh,0x2
+ je f3
+
+ mov al,[esi+ebx] ;; fetch source texel
+ add edx,ebp ;; ypos f += ystep f
+ adc bl,cl ;; ypos i += ystep i
+ mov dl,[eax] ;; colormap texel
+ and bl,0x7f ;; mask 0-127 texture index
+ mov [edi],dl ;; output pixel
+ mov al,[esi+ebx]
+ add edx,ebp ;; fetch source texel
+ adc bl,cl ;; ypos f += ystep f
+ mov dl,[eax] ;; ypos i += ystep i
+ and bl,0x7f ;; colormap texel
+p2: add edi,0x12345678 ;; mask 0-127 texture index
+ mov [edi],dl
+p3: add edi,0x12345678 ;; output pixel
+;;
+;; test if there was at least 4 pixels
+;;
+f3:
+ test ch,0xff ;; test quad count
+ je near vdone
+;;
+;; ebp : ystep frac. upper 16 bits
+;; edx : y frac. upper 16 bits
+;; ebx : y i. lower 7 bits, masked for index
+;; ecx : ch = counter, cl = y step i.
+;; eax : colormap aligned 256
+;; esi : source texture column
+;; edi : dest screen
+;;
+v4quadloop:
+ mov dh,0x7f ;; prep mask
+align 4
+vquadloop:
+ mov al,[esi+ebx] ;; prep loop
+ add edx,ebp ;; ypos f += ystep f
+ adc bl,cl ;; ypos i += ystep i
+ mov dl,[eax] ;; colormap texel
+ mov [edi],dl ;; output pixel
+ and bl,0x7f ;; mask 0-127 texture index
+
+ mov al,[esi+ebx] ;; fetch source texel
+ add edx,ebp
+ adc bl,cl
+p4: add edi,0x12345678
+ mov dl,[eax]
+ and bl,0x7f
+ mov [edi],dl
+
+ mov al,[esi+ebx] ;; fetch source texel
+ add edx,ebp
+ adc bl,cl
+p5: add edi,0x12345678
+ mov dl,[eax]
+ and bl,0x7f
+ mov [edi],dl
+
+ mov al,[esi+ebx] ;; fetch source texel
+ add edx,ebp
+ adc bl,cl
+p6: add edi,0x12345678
+ mov dl,[eax]
+ and bl,0x7f
+ mov [edi],dl
+
+p7: add edi,0x12345678
+
+ dec ch
+ jne vquadloop
+
+vdone:
+ pop ebx ;; restore register variables
+ pop edi
+ pop esi
+ pop ebp ;; restore caller's stack frame pointer
+ ret
+
+;;----------------------------------------------------------------------
+;;13-02-98:
+;; R_DrawSkyColumn : same as R_DrawColumn but:
+;;
+;; - wrap around 256 instead of 127.
+;; this is needed because we have a higher texture for mouselook,
+;; we need at least 200 lines for the sky.
+;;
+;; NOTE: the sky should never wrap, so it could use a faster method.
+;; for the moment, we'll still use a wrapping method...
+;;
+;; IT S JUST A QUICK CUT N PASTE, WAS NOT OPTIMISED AS IT SHOULD BE !!!
+;;
+;;----------------------------------------------------------------------
+
+cglobal R_DrawSkyColumn_8
+; align 16
+R_DrawSkyColumn_8:
+ push ebp
+ push esi
+ push edi
+ push ebx
+;;
+;; dest = ylookup[dc_yl] + columnofs[dc_x];
+;;
+ mov ebp,[dc_yl]
+ mov ebx,ebp
+ mov edi,[ylookup+ebx*4]
+ mov ebx,[dc_x]
+ add edi,[columnofs+ebx*4] ;; edi = dest
+;;
+;; pixelcount = yh - yl + 1
+;;
+ mov eax,[dc_yh]
+ inc eax
+ sub eax,ebp ;; pixel count
+ mov [pixelcount],eax ;; save for final pixel
+ jle near vskydone ;; nothing to scale
+;;
+;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
+;;
+ mov ecx,[dc_iscale] ;; fracstep
+ mov eax,[centery]
+ sub eax,ebp
+ imul eax,ecx
+ mov edx,[dc_texturemid]
+ sub edx,eax
+ mov ebx,edx
+ shr ebx,16 ;; frac int.
+ and ebx,0xff
+ shl edx,16 ;; y frac up
+ mov ebp,ecx
+ shl ebp,16 ;; fracstep f. up
+ shr ecx,16 ;; fracstep i. ->cl
+ mov esi,[dc_source]
+;;
+;; lets rock :) !
+;;
+ mov eax,[pixelcount]
+ mov dh,al
+ shr eax,0x2
+ mov ch,al ;; quad count
+ mov eax,[dc_colormap]
+ test dh,0x3
+ je vskyquadloop
+;;
+;; do un-even pixel
+;;
+ test dh,0x1
+ je f2
+ mov al,[esi+ebx] ;; prep un-even loops
+ add edx,ebp ;; ypos f += ystep f
+ adc bl,cl ;; ypos i += ystep i
+ mov dl,[eax] ;; colormap texel
+ mov [edi],dl ;; output pixel
+p8: add edi,0x12345678
+;;
+;; do two non-quad-aligned pixels
+;;
+f2: test dh,0x2
+ je skyf3
+
+ mov al,[esi+ebx] ;; fetch source texel
+ add edx,ebp ;; ypos f += ystep f
+ adc bl,cl ;; ypos i += ystep i
+ mov dl,[eax] ;; colormap texel
+ mov [edi],dl ;; output pixel
+
+ mov al,[esi+ebx] ;; fetch source texel
+ add edx,ebp ;; ypos f += ystep f
+ adc bl,cl ;; ypos i += ystep i
+ mov dl,[eax] ;; colormap texel
+p9: add edi,0x12345678
+ mov [edi],dl ;; output pixel
+
+pa: add edi,0x12345678
+;;
+;; test if there was at least 4 pixels
+;;
+skyf3: test ch,0xff ;; test quad count
+ je vskydone
+;;
+;; ebp : ystep frac. upper 24 bits
+;; edx : y frac. upper 24 bits
+;; ebx : y i. lower 7 bits, masked for index
+;; ecx : ch = counter, cl = y step i.
+;; eax : colormap aligned 256
+;; esi : source texture column
+;; edi : dest screen
+;;
+align 4
+vskyquadloop:
+ mov al,[esi+ebx] ;; prep loop
+ add edx,ebp ;; ypos f += ystep f
+ mov dl,[eax] ;; colormap texel
+ adc bl,cl ;; ypos i += ystep i
+ mov [edi],dl ;; output pixel
+
+ mov al,[esi+ebx] ;; fetch source texel
+ add edx,ebp
+ adc bl,cl
+pb: add edi,0x12345678
+ mov dl,[eax]
+ mov [edi],dl
+
+ mov al,[esi+ebx] ;; fetch source texel
+ add edx,ebp
+ adc bl,cl
+pc: add edi,0x12345678
+ mov dl,[eax]
+ mov [edi],dl
+
+ mov al,[esi+ebx] ;; fetch source texel
+ add edx,ebp
+ adc bl,cl
+pd: add edi,0x12345678
+ mov dl,[eax]
+ mov [edi],dl
+
+pe: add edi,0x12345678
+
+ dec ch
+ jne vskyquadloop
+vskydone:
+ pop ebx
+ pop edi
+ pop esi
+ pop ebp
+ ret
+
+
+;;----------------------------------------------------------------------
+;; R_DrawTranslucentColumn_8
+;;
+;; Vertical column texture drawer, with transparency. Replaces Doom2's
+;; 'fuzz' effect, which was not so beautiful.
+;; Transparency is always impressive in some way, don't know why...
+;;----------------------------------------------------------------------
+
+cglobal R_DrawTranslucentColumn_8
+R_DrawTranslucentColumn_8:
+ push ebp ;; preserve caller's stack frame pointer
+ push esi ;; preserve register variables
+ push edi
+ push ebx
+;;
+;; dest = ylookup[dc_yl] + columnofs[dc_x];
+;;
+ mov ebp,[dc_yl]
+ mov ebx,ebp
+ mov edi,[ylookup+ebx*4]
+ mov ebx,[dc_x]
+ add edi,[columnofs+ebx*4] ;; edi = dest
+;;
+;; pixelcount = yh - yl + 1
+;;
+ mov eax,[dc_yh]
+ inc eax
+ sub eax,ebp ;; pixel count
+ mov [pixelcount],eax ;; save for final pixel
+ jle near vtdone ;; nothing to scale
+;;
+;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
+;;
+ mov ecx,[dc_iscale] ;; fracstep
+ mov eax,[centery]
+ sub eax,ebp
+ imul eax,ecx
+ mov edx,[dc_texturemid]
+ sub edx,eax
+ mov ebx,edx
+
+ shr ebx,16 ;; frac int.
+ and ebx,0x7f
+ shl edx,16 ;; y frac up
+
+ mov ebp,ecx
+ shl ebp,16 ;; fracstep f. up
+ shr ecx,16 ;; fracstep i. ->cl
+ and cl,0x7f
+ push cx
+ mov ecx,edx
+ pop cx
+ mov edx,[dc_colormap]
+ mov esi,[dc_source]
+;;
+;; lets rock :) !
+;;
+ mov eax,[pixelcount]
+ shr eax,0x2
+ test byte [pixelcount],0x3
+ mov ch,al ;; quad count
+ mov eax,[dc_transmap]
+ je vt4quadloop
+;;
+;; do un-even pixel
+;;
+ test byte [pixelcount],0x1
+ je trf2
+
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ add ecx,ebp
+ adc bl,cl
+ mov al,[edi] ;; fetch dest : index into colormap
+ and bl,0x7f
+ mov dl,[eax]
+ mov dl,[edx]
+ mov [edi],dl
+pf: add edi,0x12345678
+;;
+;; do two non-quad-aligned pixels
+;;
+trf2: test byte [pixelcount],0x2
+ je trf3
+
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ add ecx,ebp
+ adc bl,cl
+ mov al,[edi] ;; fetch dest : index into colormap
+ and bl,0x7f
+ mov dl,[eax]
+ mov dl,[edx]
+ mov [edi],dl
+pg: add edi,0x12345678
+
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ add ecx,ebp
+ adc bl,cl
+ mov al,[edi] ;; fetch dest : index into colormap
+ and bl,0x7f
+ mov dl,[eax]
+ mov dl,[edx]
+ mov [edi],dl
+ph: add edi,0x12345678
+;;
+;; test if there was at least 4 pixels
+;;
+trf3: test ch,0xff ;; test quad count
+ je near vtdone
+
+;;
+;; ebp : ystep frac. upper 24 bits
+;; edx : y frac. upper 24 bits
+;; ebx : y i. lower 7 bits, masked for index
+;; ecx : ch = counter, cl = y step i.
+;; eax : colormap aligned 256
+;; esi : source texture column
+;; edi : dest screen
+;;
+vt4quadloop:
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov [tystep],ebp
+pi: add edi,0x12345678
+ mov al,[edi] ;; fetch dest : index into colormap
+pj: sub edi,0x12345678
+ mov ebp,edi
+pk: sub edi,0x12345678
+ jmp short inloop
+align 4
+vtquadloop:
+ add ecx,[tystep]
+ adc bl,cl
+q1: add ebp,0x23456789
+ and bl,0x7f
+ mov dl,[eax]
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov dl,[edx]
+ mov [edi],dl
+ mov al,[ebp] ;; fetch dest : index into colormap
+inloop:
+ add ecx,[tystep]
+ adc bl,cl
+q2: add edi,0x23456789
+ and bl,0x7f
+ mov dl,[eax]
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov dl,[edx]
+ mov [ebp+0x0],dl
+ mov al,[edi] ;; fetch dest : index into colormap
+
+ add ecx,[tystep]
+ adc bl,cl
+q3: add ebp,0x23456789
+ and bl,0x7f
+ mov dl,[eax]
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov dl,[edx]
+ mov [edi],dl
+ mov al,[ebp] ;; fetch dest : index into colormap
+
+ add ecx,[tystep]
+ adc bl,cl
+q4: add edi,0x23456789
+ and bl,0x7f
+ mov dl,[eax]
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov dl,[edx]
+ mov [ebp],dl
+ mov al,[edi] ;; fetch dest : index into colormap
+
+ dec ch
+ jne vtquadloop
+vtdone:
+ pop ebx
+ pop edi
+ pop esi
+ pop ebp
+ ret
+
+
+;;----------------------------------------------------------------------
+;; R_DrawShadeColumn
+;;
+;; for smoke..etc.. test.
+;;----------------------------------------------------------------------
+cglobal R_DrawShadeColumn_8
+R_DrawShadeColumn_8:
+ push ebp ;; preserve caller's stack frame pointer
+ push esi ;; preserve register variables
+ push edi
+ push ebx
+
+;;
+;; dest = ylookup[dc_yl] + columnofs[dc_x];
+;;
+ mov ebp,[dc_yl]
+ mov ebx,ebp
+ mov edi,[ylookup+ebx*4]
+ mov ebx,[dc_x]
+ add edi,[columnofs+ebx*4] ;; edi = dest
+;;
+;; pixelcount = yh - yl + 1
+;;
+ mov eax,[dc_yh]
+ inc eax
+ sub eax,ebp ;; pixel count
+ mov [pixelcount],eax ;; save for final pixel
+ jle near shdone ;; nothing to scale
+;;
+;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
+;;
+ mov ecx,[dc_iscale] ;; fracstep
+ mov eax,[centery]
+ sub eax,ebp
+ imul eax,ecx
+ mov edx,[dc_texturemid]
+ sub edx,eax
+ mov ebx,edx
+ shr ebx,16 ;; frac int.
+ and ebx,byte +0x7f
+ shl edx,16 ;; y frac up
+
+ mov ebp,ecx
+ shl ebp,16 ;; fracstep f. up
+ shr ecx,16 ;; fracstep i. ->cl
+ and cl,0x7f
+
+ mov esi,[dc_source]
+;;
+;; lets rock :) !
+;;
+ mov eax,[pixelcount]
+ mov dh,al
+ shr eax,2
+ mov ch,al ;; quad count
+ mov eax,[colormaps]
+ test dh,3
+ je sh4quadloop
+;;
+;; do un-even pixel
+;;
+ test dh,0x1
+ je shf2
+
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ add edx,ebp
+ adc bl,cl
+ mov al,[edi] ;; fetch dest : index into colormap
+ and bl,0x7f
+ mov dl,[eax]
+ mov [edi],dl
+pl: add edi,0x12345678
+;;
+;; do two non-quad-aligned pixels
+;;
+shf2:
+ test dh,0x2
+ je shf3
+
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ add edx,ebp
+ adc bl,cl
+ mov al,[edi] ;; fetch dest : index into colormap
+ and bl,0x7f
+ mov dl,[eax]
+ mov [edi],dl
+pm: add edi,0x12345678
+
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ add edx,ebp
+ adc bl,cl
+ mov al,[edi] ;; fetch dest : index into colormap
+ and bl,0x7f
+ mov dl,[eax]
+ mov [edi],dl
+pn: add edi,0x12345678
+;;
+;; test if there was at least 4 pixels
+;;
+shf3:
+ test ch,0xff ;; test quad count
+ je near shdone
+
+;;
+;; ebp : ystep frac. upper 24 bits
+;; edx : y frac. upper 24 bits
+;; ebx : y i. lower 7 bits, masked for index
+;; ecx : ch = counter, cl = y step i.
+;; eax : colormap aligned 256
+;; esi : source texture column
+;; edi : dest screen
+;;
+sh4quadloop:
+ mov dh,0x7f ;; prep mask
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov [tystep],ebp
+po: add edi,0x12345678
+ mov al,[edi] ;; fetch dest : index into colormap
+pp: sub edi,0x12345678
+ mov ebp,edi
+pq: sub edi,0x12345678
+ jmp short shinloop
+
+align 4
+shquadloop:
+ add edx,[tystep]
+ adc bl,cl
+ and bl,dh
+q5: add ebp,0x12345678
+ mov dl,[eax]
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov [edi],dl
+ mov al,[ebp] ;; fetch dest : index into colormap
+shinloop:
+ add edx,[tystep]
+ adc bl,cl
+ and bl,dh
+q6: add edi,0x12345678
+ mov dl,[eax]
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov [ebp],dl
+ mov al,[edi] ;; fetch dest : index into colormap
+
+ add edx,[tystep]
+ adc bl,cl
+ and bl,dh
+q7: add ebp,0x12345678
+ mov dl,[eax]
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov [edi],dl
+ mov al,[ebp] ;; fetch dest : index into colormap
+
+ add edx,[tystep]
+ adc bl,cl
+ and bl,dh
+q8: add edi,0x12345678
+ mov dl,[eax]
+ mov ah,[esi+ebx] ;; fetch texel : colormap number
+ mov [ebp],dl
+ mov al,[edi] ;; fetch dest : index into colormap
+
+ dec ch
+ jne shquadloop
+
+shdone:
+ pop ebx ;; restore register variables
+ pop edi
+ pop esi
+ pop ebp ;; restore caller's stack frame pointer
+ ret
+
+
+
+;;----------------------------------------------------------------------
+;;
+;; R_DrawSpan
+;;
+;; Horizontal texture mapping
+;;
+;;----------------------------------------------------------------------
+
+
+[SECTION .data]
+
+oldcolormap dd 0
+
+[SECTION CODE_SEG write]
+
+cglobal R_DrawSpan_8
+R_DrawSpan_8:
+ push ebp ;; preserve caller's stack frame pointer
+ push esi ;; preserve register variables
+ push edi
+ push ebx
+;;
+;; initilise registers
+;;
+
+ mov edx, [ds_xfrac]
+ mov eax, [ds_ystep]
+ ror edx, 14
+ ror eax, 15
+ mov bl, dl
+ mov ecx, [ds_xstep]
+ mov dh, al
+ mov ax, 1
+ mov [tystep], eax
+
+
+ mov eax, [ds_yfrac]
+ ror ecx, 13
+ ror eax, 16
+ mov dl, cl
+ mov bh, al
+ xor cx, cx
+ and ebx, 0x3fff
+ mov [pixelcount],ecx
+
+ mov ecx, [ds_x2]
+ mov edi, [ds_y]
+ mov esi, [ds_x1]
+ mov edi, [ylookup+edi*4]
+ mov ebp, ebx
+ add edi, [columnofs+esi*4]
+ sub esi, ecx ;; pixel count
+ shr ebp, 2
+ mov ecx, [ds_colormap]
+ mov ax, si
+ mov esi, [ds_source]
+ sar ax,1
+ jnc near .midloop ;; check parity
+
+; summary
+; edx = high16bit xfrac[0..13], ah=ystep[16..24] al=xtep[14..21]
+; ebx = high16bit =0, bh=yfrac[16..24], bl=xfrac[14..21]
+; ecx = colormap table cl=0 (colormap is aligned 8 bits)
+; eax = high16bit yfrac[0..15], dx = count
+; esi = flat texture source
+; edi = screeen buffer destination
+; ebp = work register
+; pixelcount = high16bit xstep[0..13] rest to 0
+; tystep = high16bit ystep[0..15] low 16 bit = 2 (increment of count)
+
+align 4
+.loop
+ add eax, [tystep]
+ mov cl, [esi+ebp]
+ adc bh, dh
+ mov cl, [ecx]
+ and bh, 0x3f
+ mov [edi], cl
+ mov ebp, ebx
+ inc edi
+ shr ebp, 2
+
+.midloop:
+ add edx, [pixelcount]
+ mov cl, [esi+ebp]
+ adc bl, dl
+ mov cl, [ecx]
+ mov ebp, ebx
+ mov [edi], cl
+ inc edi
+ shr ebp, 2
+
+ test eax, 0xffff
+ jnz near .loop
+
+.hdone: pop ebx ;; restore register variables
+ pop edi
+ pop esi
+ pop ebp ;; restore caller's stack frame pointer
+ ret
+
+
+[SECTION .data]
+
+obelix dd 0
+etaussi dd 0
+
+[SECTION CODE_SEG]
+
+cglobal R_DrawSpan_8_old
+R_DrawSpan_8_old:
+ push ebp ;; preserve caller's stack frame pointer
+ push esi ;; preserve register variables
+ push edi
+ push ebx
+;;
+;; find loop count
+;;
+ mov eax,[ds_x2]
+ inc eax
+ sub eax,[ds_x1] ;; pixel count
+ mov [pixelcount],eax ;; save for final pixel
+ js near .hdone ;; nothing to scale
+ shr eax,0x1 ;; double pixel count
+ mov [loopcount],eax
+;;
+;; build composite position
+;;
+ mov ebp,[ds_xfrac]
+ shl ebp,10
+ and ebp,0xffff0000
+ mov eax,[ds_yfrac]
+ shr eax,6
+ and eax,0xffff
+ mov edi,[ds_y]
+ or ebp,eax
+
+ mov esi,[ds_source]
+;;
+;; calculate screen dest
+;;
+ mov edi,[ylookup+edi*4]
+ mov eax,[ds_x1]
+ add edi,[columnofs+eax*4]
+;;
+;; build composite step
+;;
+ mov ebx,[ds_xstep]
+ shl ebx,10
+ and ebx,0xffff0000
+ mov eax,[ds_ystep]
+ shr eax,6
+ and eax,0xffff
+ or ebx,eax
+
+ mov [obelix],ebx
+ mov [etaussi],esi
+
+;; %eax aligned colormap
+;; %ebx aligned colormap
+;; %ecx,%edx scratch
+;; %esi virtual source
+;; %edi moving destination pointer
+;; %ebp frac
+
+ mov eax,[ds_colormap]
+ mov ecx,ebp
+ add ebp,ebx ;; advance frac pointer
+ shr cx,10
+ rol ecx,6
+ and ecx,4095 ;; finish calculation for third pixel
+ mov edx,ebp
+ shr dx,10
+ rol edx,6
+ add ebp,ebx ;; advance frac pointer
+ and edx,4095 ;; finish calculation for fourth pixel
+ mov ebx,eax
+ mov al,[esi+ecx] ;; get first pixel
+ mov bl,[esi+edx] ;; get second pixel
+
+ test dword [pixelcount],0xfffffffe
+
+ mov dl,[eax] ;; color translate first pixel
+
+;; movw $0xf0f0,%dx ;;see visplanes start
+
+ je .hchecklast
+
+ mov dh,[ebx] ;; color translate second pixel
+ mov esi,[loopcount]
+align 4
+.hdoubleloop:
+ mov ecx,ebp
+ shr cx,10
+ rol ecx,6
+ add ebp,[obelix] ;; advance frac pointer
+ mov [edi],dx ;; write first pixel
+ and ecx,4095 ;; finish calculation for third pixel
+ mov edx,ebp
+ shr dx,10
+ rol edx,6
+ add ecx,[etaussi]
+ and edx,4095 ;; finish calculation for fourth pixel
+ mov al,[ecx] ;; get third pixel
+ add ebp,[obelix] ;; advance frac pointer
+ add edx,[etaussi]
+ mov bl,[edx] ;; get fourth pixel
+ mov dl,[eax] ;; color translate third pixel
+ add edi,byte +0x2 ;; advance to third pixel destination
+ dec esi ;; done with loop?
+ mov dh,[ebx] ;; color translate fourth pixel
+ jne .hdoubleloop
+;; check for final pixel
+.hchecklast:
+ test dword [pixelcount],0x1
+ je .hdone
+ mov [edi],dl ;; write final pixel
+.hdone: pop ebx ;; restore register variables
+ pop edi
+ pop esi
+ pop ebp ;; restore caller's stack frame pointer
+ ret
+
+
+;; ========================================================================
+;; Rasterization des segments d'un polyg“ne textur‚ de maniŠre LINEAIRE.
+;; Il s'agit donc d'interpoler les coordonn‚es aux bords de la texture en
+;; mˆme temps que les abscisses minx/maxx pour chaque ligne.
+;; L'argument 'dir' indique quels bords de la texture sont interpolés:
+;; 0 : segments associ‚s aux bord SUPERIEUR et INFERIEUR ( TY constant )
+;; 1 : segments associ‚s aux bord GAUCHE et DROITE ( TX constant )
+;; ========================================================================
+;;
+;; void rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir );
+;; ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8
+;;
+;; Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant.
+;;
+;; Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant.
+;;
+;;
+;; Uses: extern struct rastery *_rastertab;
+;;
+
+[SECTION CODE_SEG write]
+
+MINX EQU 0
+MAXX EQU 4
+TX1 EQU 8
+TY1 EQU 12
+TX2 EQU 16
+TY2 EQU 20
+RASTERY_SIZEOF EQU 24
+
+cglobal rasterize_segment_tex
+rasterize_segment_tex:
+ push ebp
+ mov ebp,esp
+
+ sub esp,byte +0x8 ;; alloue les variables locales
+
+ push ebx
+ push esi
+ push edi
+ o16 mov ax,es
+ push eax
+
+;; #define DX [ebp-4]
+;; #define TD [ebp-8]
+
+ mov eax,[ebp+0xc] ;; y1
+ mov ebx,[ebp+0x14] ;; y2
+ cmp ebx,eax
+ je near .L_finished ;; special (y1==y2) segment horizontal, exit!
+
+ jg near .L_rasterize_right
+
+;;rasterize_left: ;; on rasterize un segment … la GAUCHE du polyg“ne
+
+ mov ecx,eax
+ sub ecx,ebx
+ inc ecx ;; y1-y2+1
+
+ mov eax,RASTERY_SIZEOF
+ mul ebx ;; * y2
+ mov esi,[prastertab]
+ add esi,eax ;; point into rastertab[y2]
+
+ mov eax,[ebp+0x8] ;; ARG1
+ sub eax,[ebp+0x10] ;; ARG3
+ shl eax,0x10 ;; ((x1-x2)<<PRE) ...
+ cdq
+ idiv ecx ;; dx = ... / (y1-y2+1)
+ mov [ebp-0x4],eax ;; DX
+
+ mov eax,[ebp+0x18] ;; ARG5
+ sub eax,[ebp+0x1c] ;; ARG6
+ shl eax,0x10
+ cdq
+ idiv ecx ;; tdx =((tx1-tx2)<<PRE) / (y1-y2+1)
+ mov [ebp-0x8],eax ;; idem tdy =((ty1-ty2)<<PRE) / (y1-y2+1)
+
+ mov eax,[ebp+0x10] ;; ARG3
+ shl eax,0x10 ;; x = x2<<PRE
+
+ mov ebx,[ebp+0x1c] ;; ARG6
+ shl ebx,0x10 ;; tx = tx2<<PRE d0
+ ;; ty = ty2<<PRE d1
+ mov edx,[ebp+0x20] ;; ARG7
+ shl edx,0x10 ;; ty = ty<<PRE d0
+ ;; tx = tx<<PRE d1
+ push ebp
+ mov edi,[ebp-0x4] ;; DX
+ cmp dword [ebp+0x24],byte +0x0 ;; ARG8 direction ?
+
+ mov ebp,[ebp-0x8] ;; TD
+ je .L_rleft_h_loop
+;;
+;; TY varie, TX est constant
+;;
+.L_rleft_v_loop:
+ mov [esi+MINX],eax ;; rastertab[y].minx = x
+ add ebx,ebp
+ mov [esi+TX1],edx ;; .tx1 = tx
+ add eax,edi
+ mov [esi+TY1],ebx ;; .ty1 = ty
+
+ ;;addl DX, %eax // x += dx
+ ;;addl TD, %ebx // ty += tdy
+
+ add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
+ dec ecx
+ jne .L_rleft_v_loop
+ pop ebp
+ jmp .L_finished
+;;
+;; TX varie, TY est constant
+;;
+.L_rleft_h_loop:
+ mov [esi+MINX],eax ;; rastertab[y].minx = x
+ add eax,edi
+ mov [esi+TX1],ebx ;; .tx1 = tx
+ add ebx,ebp
+ mov [esi+TY1],edx ;; .ty1 = ty
+
+ ;;addl DX, %eax // x += dx
+ ;;addl TD, %ebx // tx += tdx
+
+ add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
+ dec ecx
+ jne .L_rleft_h_loop
+ pop ebp
+ jmp .L_finished
+;;
+;; on rasterize un segment … la DROITE du polyg“ne
+;;
+.L_rasterize_right:
+ mov ecx,ebx
+ sub ecx,eax
+ inc ecx ;; y2-y1+1
+
+ mov ebx,RASTERY_SIZEOF
+ mul ebx ;; * y1
+ mov esi,[prastertab]
+ add esi,eax ;; point into rastertab[y1]
+
+ mov eax,[ebp+0x10] ;; ARG3
+ sub eax,[ebp+0x8] ;; ARG1
+ shl eax,0x10 ;; ((x2-x1)<<PRE) ...
+ cdq
+ idiv ecx ;; dx = ... / (y2-y1+1)
+ mov [ebp-0x4],eax ;; DX
+
+ mov eax,[ebp+0x1c] ;; ARG6
+ sub eax,[ebp+0x18] ;; ARG5
+ shl eax,0x10
+ cdq
+ idiv ecx ;; tdx =((tx2-tx1)<<PRE) / (y2-y1+1)
+ mov [ebp-0x8],eax ;; idem tdy =((ty2-ty1)<<PRE) / (y2-y1+1)
+
+ mov eax,[ebp+0x8] ;; ARG1
+ shl eax,0x10 ;; x = x1<<PRE
+
+ mov ebx,[ebp+0x18] ;; ARG5
+ shl ebx,0x10 ;; tx = tx1<<PRE d0
+ ;; ty = ty1<<PRE d1
+ mov edx,[ebp+0x20] ;; ARG7
+ shl edx,0x10 ;; ty = ty<<PRE d0
+ ;; tx = tx<<PRE d1
+ push ebp
+ mov edi,[ebp-0x4] ;; DX
+
+ cmp dword [ebp+0x24], 0 ;; direction ?
+
+ mov ebp,[ebp-0x8] ;; TD
+ je .L_rright_h_loop
+;;
+;; TY varie, TX est constant
+;;
+.L_rright_v_loop:
+
+ mov [esi+MAXX],eax ;; rastertab[y].maxx = x
+ add ebx,ebp
+ mov [esi+TX2],edx ;; .tx2 = tx
+ add eax,edi
+ mov [esi+TY2],ebx ;; .ty2 = ty
+
+ ;;addl DX, %eax // x += dx
+ ;;addl TD, %ebx // ty += tdy
+
+ add esi,RASTERY_SIZEOF
+ dec ecx
+ jne .L_rright_v_loop
+
+ pop ebp
+
+ jmp short .L_finished
+;;
+;; TX varie, TY est constant
+;;
+.L_rright_h_loop:
+ mov [esi+MAXX],eax ;; rastertab[y].maxx = x
+ add eax,edi
+ mov [esi+TX2],ebx ;; .tx2 = tx
+ add ebx,ebp
+ mov [esi+TY2],edx ;; .ty2 = ty
+
+ ;;addl DX, %eax // x += dx
+ ;;addl TD, %ebx // tx += tdx
+
+ add esi,RASTERY_SIZEOF
+ dec ecx
+ jne .L_rright_h_loop
+
+ pop ebp
+
+.L_finished:
+ pop eax
+ o16 mov es,ax
+ pop edi
+ pop esi
+ pop ebx
+
+ mov esp,ebp
+ pop ebp
+ ret
+
+
+;;; this version can draw 64x64 tiles, but they would have to be arranged 4 per row,
+;; so that the stride from one line to the next is 256
+;;
+;; .data
+;;xstep dd 0
+;;ystep dd 0
+;;texwidth dd 64 ;; texture width
+;; .text
+;; this code is kept in case we add high-detail floor textures for example (256x256)
+; align 16
+;_R_DrawSpan_8:
+; push ebp ;; preserve caller's stack frame pointer
+; push esi ;; preserve register variables
+; push edi
+; push ebx
+;;
+;; find loop count
+;;
+; mov eax,[ds_x2]
+; inc eax
+; sub eax,[ds_x1] ;; pixel count
+; mov [pixelcount],eax ;; save for final pixel
+; js near .hdone ;; nothing to scale
+;;
+;; calculate screen dest
+;;
+; mov edi,[ds_y]
+; mov edi,[ylookup+edi*4]
+; mov eax,[ds_x1]
+; add edi,[columnofs+eax*4]
+;;
+;; prepare registers for inner loop
+;;
+; xor eax,eax
+; mov edx,[ds_xfrac]
+; ror edx,16
+; mov al,dl
+; mov ecx,[ds_yfrac]
+; ror ecx,16
+; mov ah,cl
+;
+; mov ebx,[ds_xstep]
+; ror ebx,16
+; mov ch,bl
+; and ebx,0xffff0000
+; mov [xstep],ebx
+; mov ebx,[ds_ystep]
+; ror ebx,16
+; mov dh,bl
+; and ebx,0xffff0000
+; mov [ystep],ebx
+;
+; mov esi,[ds_source]
+;
+;;; %eax Yi,Xi in %ah,%al
+;;; %ebx aligned colormap
+;;; %ecx Yfrac upper, dXi in %ch, %cl is counter (upto 1024pels, =4x256)
+;;; %edx Xfrac upper, dYi in %dh, %dl receives mapped pixels from (ebx)
+;;; ystep dYfrac, add to %ecx, low word is 0
+;;; xstep dXfrac, add to %edx, low word is 0
+;;; %ebp temporary register serves as offset like %eax
+;;; %esi virtual source
+;;; %edi moving destination pointer
+;
+; mov ebx,[pixelcount]
+; shr ebx,0x2 ;; 4 pixels per loop
+; test bl,0xff
+; je near .hchecklast
+; mov cl,bl
+;
+; mov ebx,[dc_colormap]
+;;;
+;;; prepare loop with first pixel
+;;;
+; add ecx,[ystep] ;;pr‚a1
+; adc ah,dh
+; add edx,[xstep]
+; adc al,ch
+; and eax,0x3f3f
+; mov bl,[esi+eax] ;;pr‚b1
+; mov dl,[ebx] ;;pr‚c1
+;
+; add ecx,[ystep] ;;a2
+; adc ah,dh
+;
+;.hdoubleloop:
+; mov [edi+1],dl
+; add edx,[xstep]
+; adc al,ch
+; add edi,byte +0x2
+; mov ebp,eax
+; add ecx,[ystep]
+; adc ah,dh
+; and ebp,0x3f3f
+; add edx,[xstep]
+; mov bl,[esi+ebp]
+; adc al,ch
+; mov dl,[ebx]
+; and eax,0x3f3f
+; mov [edi],dl
+; mov bl,[esi+eax]
+; add ecx,[ystep]
+; adc ah,dh
+; add edx,[xstep]
+; adc al,ch
+; mov dl,[ebx]
+; mov ebp,eax
+; mov [edi+1],dl
+; and ebp,0x3f3f
+; add ecx,[ystep]
+; adc ah,dh
+; mov bl,[esi+ebp]
+; add edi,byte +0x2
+; add edx,[xstep]
+; adc al,ch
+; mov dl,[ebx]
+; and eax,0x3f3f
+; mov [edi],dl
+; mov bl,[esi+eax]
+; add ecx,[ystep]
+; adc ah,dh
+; mov dl,[ebx]
+; dec cl
+; jne near .hdoubleloop
+;;; check for final pixel
+;.hchecklast:
+;;; to do
+;.hdone:
+; pop ebx
+; pop edi
+; pop esi
+; pop ebp
+; ret
|