summaryrefslogtreecommitdiff
path: root/crc_i386.S
diff options
context:
space:
mode:
Diffstat (limited to 'crc_i386.S')
-rw-r--r--crc_i386.S240
1 files changed, 240 insertions, 0 deletions
diff --git a/crc_i386.S b/crc_i386.S
new file mode 100644
index 0000000..d23967b
--- /dev/null
+++ b/crc_i386.S
@@ -0,0 +1,240 @@
+/*
+ Copyright (c) 1990-2005 Info-ZIP. All rights reserved.
+
+ See the accompanying file LICENSE, version 2004-May-22 or later
+ (the contents of which are also included in zip.h) for terms of use.
+ If, for some reason, both of these files are missing, the Info-ZIP license
+ also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
+*/
+/*
+ * crc_i386.S, optimized CRC calculation function for Zip and UnZip,
+ * created by Paul Kienitz and Christian Spieler.
+ *
+ * GRR 961110: incorporated Scott Field optimizations from win32/crc_i386.asm
+ * => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66)
+ *
+ * SPC 970402: revised for Rodney Brown's optimizations (32-bit-wide
+ * aligned reads for most of the data from buffer), can be
+ * disabled by defining the macro NO_32_BIT_LOADS
+ *
+ * SPC 971012: added Rodney Brown's additional tweaks for 32-bit-optimized
+ * CPUs (like the Pentium Pro, Pentium II, and probably some
+ * Pentium clones). This optimization is controlled by the
+ * preprocessor switch "__686" and is disabled by default.
+ * (This default is based on the assumption that most users
+ * do not yet work on a Pentium Pro or Pentium II machine ...)
+ *
+ * COS 050116: Enabled the 686 build by default, because there are hardly any
+ * pre-686 CPUs in serious use nowadays. (See SPC 970402 above.)
+ *
+ * FLAT memory model assumed. Calling interface:
+ * - args are pushed onto the stack from right to left,
+ * - return value is given in the EAX register,
+ * - all other registers (with exception of EFLAGS) are preserved. (With
+ * GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving
+ * them nevertheless adds only 4 single byte instructions.)
+ *
+ * This source generates the function
+ * ulg crc32(ulg crc, ZCONST uch *buf, extent len).
+ *
+ * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
+ * This results in shorter code at the expense of reduced performance.
+ */
+
+/* This file is NOT used in conjunction with zlib. */
+#ifndef USE_ZLIB
+
+/* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix
+ * external symbols with an underline character '_'.
+ */
+#if defined(NO_UNDERLINE) || defined(__ELF__)
+# define _crc32 crc32
+# define _get_crc_table get_crc_table
+#endif
+/* Use 16-byte alignment if your assembler supports it. Warning: gas
+ * uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4
+ * the parameter is a number of bytes.
+ */
+#ifndef ALIGNMENT
+# define ALIGNMENT .align 4,0x90
+#endif
+
+#if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386)
+
+/* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas).
+ * Warning: it uses the AT&T syntax: mov source,dest
+ * This file is only optional. If you want to use the C version,
+ * remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string.
+ */
+
+ .file "crc_i386.S"
+
+#if !defined(PRE_686) && !defined(__686)
+ /* Optimize for Pentium Pro and compatible CPUs by default. */
+# define __686
+#endif
+
+#if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME)
+# undef USE_STACKFRAME
+#else
+ /* The default is to use standard stack frame entry, because it
+ * results in smaller code!
+ */
+# ifndef USE_STD_STACKFRAME
+# define USE_STD_STACKFRAME
+# endif
+#endif
+
+#ifdef USE_STD_STACKFRAME
+# define _STD_ENTRY pushl %ebp ; movl %esp,%ebp
+# define arg1 8(%ebp)
+# define arg2 12(%ebp)
+# define arg3 16(%ebp)
+# define _STD_LEAVE popl %ebp
+#else /* !USE_STD_STACKFRAME */
+# define _STD_ENTRY
+# define arg1 24(%esp)
+# define arg2 28(%esp)
+# define arg3 32(%esp)
+# define _STD_LEAVE
+#endif /* ?USE_STD_STACKFRAME */
+
+/*
+ * These two (three) macros make up the loop body of the CRC32 cruncher.
+ * registers modified:
+ * eax : crc value "c"
+ * esi : pointer to next data byte (or lword) "buf++"
+ * registers read:
+ * edi : pointer to base of crc_table array
+ * scratch registers:
+ * ebx : index into crc_table array
+ * (requires upper three bytes = 0 when __686 is undefined)
+ */
+#ifndef __686 /* optimize for 386, 486, Pentium */
+#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\
+ movb %al, %bl ;/* tmp = c & 0xFF */\
+ shrl $8, %eax ;/* c = (c >> 8) */\
+ xorl (%edi, %ebx, 4), %eax ;/* c ^= table[tmp] */
+#else /* __686 : optimize for Pentium Pro and compatible CPUs */
+#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\
+ movzbl %al, %ebx ;/* tmp = c & 0xFF */\
+ shrl $8, %eax ;/* c = (c >> 8) */\
+ xorl (%edi, %ebx, 4), %eax ;/* c ^=table[tmp] */
+#endif /* ?__686 */
+
+#define Do_CRC_byte /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\
+ xorb (%esi), %al ;/* c ^= *buf */\
+ incl %esi ;/* buf++ */\
+ Do_CRC
+
+#ifndef NO_32_BIT_LOADS
+#define Do_CRC_lword \
+ xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\
+ addl $4, %esi ;/* ((ulg *)buf)++ */\
+ Do_CRC \
+ Do_CRC \
+ Do_CRC \
+ Do_CRC
+#endif /* !NO_32_BIT_LOADS */
+
+
+ .text
+
+ .globl _crc32
+
+_crc32: /* ulg crc32(ulg crc, uch *buf, extent len) */
+ _STD_ENTRY
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %edx
+ pushl %ecx
+
+ movl arg2, %esi /* 2nd arg: uch *buf */
+ subl %eax, %eax /* > if (!buf) */
+ testl %esi, %esi /* > return 0; */
+ jz .L_fine /* > else { */
+ call _get_crc_table
+ movl %eax, %edi
+ movl arg1, %eax /* 1st arg: ulg crc */
+#ifndef __686
+ subl %ebx, %ebx /* ebx=0; bl usable as dword */
+#endif
+ movl arg3, %ecx /* 3rd arg: extent len */
+ notl %eax /* > c = ~crc; */
+
+ testl %ecx, %ecx
+#ifndef NO_UNROLLED_LOOPS
+ jz .L_bail
+# ifndef NO_32_BIT_LOADS
+ /* Assert now have positive length */
+.L_align_loop:
+ testl $3, %esi /* Align buf on lword boundary */
+ jz .L_aligned_now
+ Do_CRC_byte
+ decl %ecx
+ jnz .L_align_loop
+.L_aligned_now:
+# endif /* !NO_32_BIT_LOADS */
+ movl %ecx, %edx /* save len in edx */
+ shrl $3, %ecx /* ecx = len / 8 */
+ jz .L_No_Eights
+/* align loop head at start of 486 internal cache line !! */
+ ALIGNMENT
+.L_Next_Eight:
+# ifndef NO_32_BIT_LOADS
+ /* Do_CRC_lword */
+ xorl (%esi), %eax ;/* c ^= *(ulg *)buf */
+ addl $4, %esi ;/* ((ulg *)buf)++ */
+ Do_CRC
+ Do_CRC
+ Do_CRC
+ Do_CRC
+ /* Do_CRC_lword */
+ xorl (%esi), %eax ;/* c ^= *(ulg *)buf */
+ addl $4, %esi ;/* ((ulg *)buf)++ */
+ Do_CRC
+ Do_CRC
+ Do_CRC
+ Do_CRC
+# else /* NO_32_BIT_LOADS */
+ Do_CRC_byte
+ Do_CRC_byte
+ Do_CRC_byte
+ Do_CRC_byte
+ Do_CRC_byte
+ Do_CRC_byte
+ Do_CRC_byte
+ Do_CRC_byte
+# endif /* ?NO_32_BIT_LOADS */
+ decl %ecx
+ jnz .L_Next_Eight
+
+.L_No_Eights:
+ movl %edx, %ecx
+ andl $7, %ecx /* ecx = len % 8 */
+#endif /* !NO_UNROLLED_LOOPS */
+ jz .L_bail /* > if (len) */
+/* align loop head at start of 486 internal cache line !! */
+ ALIGNMENT
+.L_loupe: /* > do { */
+ Do_CRC_byte /* c = CRC32(c, *buf++); */
+ decl %ecx /* > } while (--len); */
+ jnz .L_loupe
+
+.L_bail: /* > } */
+ notl %eax /* > return ~c; */
+.L_fine:
+ popl %ecx
+ popl %edx
+ popl %ebx
+ popl %esi
+ popl %edi
+ _STD_LEAVE
+ ret
+
+#else
+ error: this asm version is for 386 only
+#endif /* i386 || _i386 || _I386 || __i386 */
+
+#endif /* !USE_ZLIB */