summaryrefslogtreecommitdiff
path: root/win32/crc_i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'win32/crc_i386.c')
-rw-r--r--win32/crc_i386.c152
1 files changed, 117 insertions, 35 deletions
diff --git a/win32/crc_i386.c b/win32/crc_i386.c
index e8a2faf..971ee66 100644
--- a/win32/crc_i386.c
+++ b/win32/crc_i386.c
@@ -1,13 +1,14 @@
/*
- Copyright (c) 1990-2005 Info-ZIP. All rights reserved.
+ Copyright (c) 1990-2007 Info-ZIP. All rights reserved.
- See the accompanying file LICENSE, version 2004-May-22 or later
+ See the accompanying file LICENSE, version 2000-Apr-09 or later
(the contents of which are also included in zip.h) for terms of use.
- If, for some reason, both of these files are missing, the Info-ZIP license
+ If, for some reason, all these files are missing, the Info-ZIP license
also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
*/
/* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm
* Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler.
+ * Last revised: 07-Jan-2007
*
* Original coded (in crc_i386.asm) and put into the public domain
* by Paul Kienitz and Christian Spieler.
@@ -56,6 +57,16 @@
* Also enabled the 686 build by default, because there are hardly any
* pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.)
*
+ * Revised 03-Jan-2006, Chr. Spieler
+ * Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to
+ * data buffer in loop body (adjust pointer only once in loop body and use
+ * offsets to access each item); added additional support for the "unfolded
+ * tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL).
+ *
+ * Revised 07-Jan-2007, Chr. Spieler
+ * Recognize additional conditional flag CRC_TABLE_ONLY that prevents
+ * compilation of the crc32() function.
+ *
* FLAT memory model assumed.
*
* Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
@@ -64,8 +75,9 @@
*/
#include "../zip.h"
+#include "../crc32.h"
-#if defined(ASM_CRC) && !defined(USE_ZLIB)
+#if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY)
#if !defined(PRE_686) && !defined(__686)
# define __686
@@ -119,14 +131,64 @@
__asm { inc esi }; \
Do_CRC; }
+#define Do_CRC_byteof(ofs) { \
+ __asm { xor al, byte ptr [esi+(ofs)] }; \
+ Do_CRC; }
+
#ifndef NO_32_BIT_LOADS
+#ifdef IZ_CRCOPTIM_UNFOLDTBL
+# define SavLen len /* the edx register is needed elsewhere */
+# define UpdCRC_dword { \
+ __asm { movzx ebx,al }; \
+ __asm { mov edx,[edi+ebx*4+3072] }; \
+ __asm { movzx ebx,ah }; \
+ __asm { shr eax,16 }; \
+ __asm { xor edx,[edi+ebx*4+2048] }; \
+ __asm { movzx ebx,al }; \
+ __asm { shr eax,8 }; \
+ __asm { xor edx,[edi+ebx*4+1024] }; \
+ __asm { mov eax,[edi+eax*4] }; \
+ __asm { xor eax,edx }; }
+# define UpdCRC_dword_sh(dwPtrIncr) { \
+ __asm { movzx ebx,al }; \
+ __asm { mov edx,[edi+ebx*4+3072] }; \
+ __asm { movzx ebx,ah }; \
+ __asm { xor edx,[edi+ebx*4+2048] }; \
+ __asm { shr eax,16 }; \
+ __asm { movzx ebx,al }; \
+ __asm { add esi, 4*dwPtrIncr }; \
+ __asm { shr eax,8 }; \
+ __asm { xor edx,[edi+ebx*4+1024] }; \
+ __asm { mov eax,[edi+eax*4] }; \
+ __asm { xor eax,edx }; }
+#else /* !IZ_CRCOPTIM_UNFOLDTBL */
+# define SavLen edx /* the edx register is free for use here */
+# define UpdCRC_dword { \
+ Do_CRC; \
+ Do_CRC; \
+ Do_CRC; \
+ Do_CRC; }
+# define UpdCRC_dword_sh(dwPtrIncr) { \
+ Do_CRC; \
+ Do_CRC; \
+ __asm { add esi, 4*(dwPtrIncr) }; \
+ Do_CRC; \
+ Do_CRC; }
+#endif /* ?IZ_CRCOPTIM_UNFOLDTBL */
+
#define Do_CRC_dword { \
__asm { xor eax, dword ptr [esi] }; \
- __asm { add esi, 4 }; \
- Do_CRC; \
- Do_CRC; \
- Do_CRC; \
- Do_CRC; }
+ UpdCRC_dword_sh(1); }
+
+#define Do_CRC_4dword { \
+ __asm { xor eax, dword ptr [esi] }; \
+ UpdCRC_dword; \
+ __asm { xor eax, dword ptr [esi+4] }; \
+ UpdCRC_dword; \
+ __asm { xor eax, dword ptr [esi+8] }; \
+ UpdCRC_dword; \
+ __asm { xor eax, dword ptr [esi+12] }; \
+ UpdCRC_dword_sh(4); }
#endif /* !NO_32_BIT_LOADS */
/* ========================================================================= */
@@ -170,46 +232,66 @@ align_loop:
jnz align_loop
aligned_now:
# endif /* !NO_32_BIT_LOADS */
- mov edx,ecx ;/* save len in edx */
- shr ecx,3 ;/* ecx = len / 8 */
- jz No_Eights
+ mov SavLen,ecx ;/* save current len for later */
+ shr ecx,4 ;/* ecx = len / 16 */
+ jz No_Sixteens
; align loop head at start of 486 internal cache line !!
align 16
-Next_Eight:
+Next_Sixteen:
}
# ifndef NO_32_BIT_LOADS
- Do_CRC_dword ;
- Do_CRC_dword ;
+ Do_CRC_4dword ;
# else /* NO_32_BIT_LOADS */
- Do_CRC_byte ;
- Do_CRC_byte ;
- Do_CRC_byte ;
- Do_CRC_byte ;
- Do_CRC_byte ;
- Do_CRC_byte ;
- Do_CRC_byte ;
- Do_CRC_byte ;
+ Do_CRC_byteof(0) ;
+ Do_CRC_byteof(1) ;
+ Do_CRC_byteof(2) ;
+ Do_CRC_byteof(3) ;
+ Do_CRC_byteof(4) ;
+ Do_CRC_byteof(5) ;
+ Do_CRC_byteof(6) ;
+ Do_CRC_byteof(7) ;
+ Do_CRC_byteof(8) ;
+ Do_CRC_byteof(9) ;
+ Do_CRC_byteof(10) ;
+ Do_CRC_byteof(11) ;
+ Do_CRC_byteof(12) ;
+ Do_CRC_byteof(13) ;
+ Do_CRC_byteof(14) ;
+ Do_CRC_byteof(15) ;
+ __asm { add esi,16 };
# endif /* ?NO_32_BIT_LOADS */
__asm {
dec ecx
- jnz Next_Eight
-No_Eights:
- mov ecx,edx
- and ecx,000000007H ;/* ecx = len % 8 */
-
+ jnz Next_Sixteen
+No_Sixteens:
+ mov ecx,SavLen
+ and ecx,00000000FH ;/* ecx = len % 16 */
+# ifndef NO_32_BIT_LOADS
+ shr ecx,2
+ jz No_Fours
+Next_Four:
+ }
+ Do_CRC_dword ;
+ __asm {
+ dec ecx
+ jnz Next_Four
+No_Fours:
+ mov ecx,SavLen
+ and ecx,000000003H ;/* ecx = len % 4 */
+# endif /* !NO_32_BIT_LOADS */
#endif /* !NO_UNROLLED_LOOPS */
- jz bail ;/*> if (len) */
+ jz bail ;/*> if (len) */
; align loop head at start of 486 internal cache line !!
align 16
-loupe: ;/*> do { */
+loupe: ;/*> do { */
}
- Do_CRC_byte ;/* c = CRC32(c, *buf++); */
+ Do_CRC_byte ;/* c = CRC32(c,*buf++,crctab);*/
__asm {
- dec ecx ;/*> } while (--len); */
+ dec ecx ;/*> } while (--len); */
jnz loupe
-bail: ;/*> } */
- not eax ;/*> return ~c; */
+bail: ;/*> } */
+ not eax ;/*> return ~c; */
fine:
pop ecx
pop edx
@@ -225,4 +307,4 @@ fine:
# pragma warning( default : 4035 )
#endif
#endif
-#endif /* ASM_CRC && !USE_ZLIB */
+#endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */