diff options
Diffstat (limited to 'src/lzo1x_c.ch')
-rw-r--r-- | src/lzo1x_c.ch | 430 |
1 files changed, 242 insertions, 188 deletions
diff --git a/src/lzo1x_c.ch b/src/lzo1x_c.ch index 08d615e..805f71d 100644 --- a/src/lzo1x_c.ch +++ b/src/lzo1x_c.ch @@ -2,19 +2,7 @@ This file is part of the LZO real-time data compression library. - Copyright (C) 2008 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2007 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2006 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2005 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1996-2014 Markus Franz Xaver Johannes Oberhumer All Rights Reserved. The LZO library is free software; you can redistribute it and/or @@ -39,6 +27,12 @@ +#if 1 && defined(DO_COMPRESS) && !defined(do_compress) + /* choose a unique name to better help PGO optimizations */ +# define do_compress LZO_PP_ECONCAT2(DO_COMPRESS,_core) +#endif + + /*********************************************************************** // compress a block of data. ************************************************************************/ @@ -46,12 +40,12 @@ static __lzo_noinline lzo_uint do_compress ( const lzo_bytep in , lzo_uint in_len, lzo_bytep out, lzo_uintp out_len, - lzo_voidp wrkmem ) + lzo_uint ti, lzo_voidp wrkmem) { - register const lzo_bytep ip; + const lzo_bytep ip; lzo_bytep op; const lzo_bytep const in_end = in + in_len; - const lzo_bytep const ip_end = in + in_len - M2_MAX_LEN - 5; + const lzo_bytep const ip_end = in + in_len - 20; const lzo_bytep ii; lzo_dict_p const dict = (lzo_dict_p) wrkmem; @@ -59,14 +53,17 @@ do_compress ( const lzo_bytep in , lzo_uint in_len, ip = in; ii = ip; - ip += 4; + ip += ti < 4 ? 4 - ti : 0; for (;;) { - register const lzo_bytep m_pos; - lzo_uint m_off; + const lzo_bytep m_pos; +#if !(LZO_DETERMINISTIC) + LZO_DEFINE_UNINITIALIZED_VAR(lzo_uint, m_off, 0); lzo_uint m_len; lzo_uint dindex; - +next: + if __lzo_unlikely(ip >= ip_end) + break; DINDEX1(dindex,ip); GINDEX(m_pos,m_off,dict,dindex,in); if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) @@ -83,210 +80,252 @@ do_compress ( const lzo_bytep in , lzo_uint in_len, goto try_match; goto literal; - try_match: -#if 1 && defined(LZO_UNALIGNED_OK_2) - if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip) +#if (LZO_OPT_UNALIGNED32) + if (UA_GET_NE32(m_pos) != UA_GET_NE32(ip)) #else - if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) + if (m_pos[0] != ip[0] || m_pos[1] != ip[1] || m_pos[2] != ip[2] || m_pos[3] != ip[3]) #endif { + /* a literal */ +literal: + UPDATE_I(dict,0,dindex,ip,in); + ip += 1 + ((ip - ii) >> 5); + continue; } - else - { - if __lzo_likely(m_pos[2] == ip[2]) - { -#if 0 - if (m_off <= M2_MAX_OFFSET) - goto match; - if (lit <= 3) - goto match; - if (lit == 3) /* better compression, but slower */ - { - assert(op - 2 > out); op[-2] |= LZO_BYTE(3); - *op++ = *ii++; *op++ = *ii++; *op++ = *ii++; - goto code_match; - } - if (m_pos[3] == ip[3]) -#endif - goto match; - } - else - { - /* still need a better way for finding M1 matches */ -#if 0 - /* a M1 match */ -#if 0 - if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3) +/*match:*/ + UPDATE_I(dict,0,dindex,ip,in); #else - if (m_off <= M1_MAX_OFFSET && lit == 3) -#endif - { - register lzo_uint t; - - t = lit; - assert(op - 2 > out); op[-2] |= LZO_BYTE(t); - do *op++ = *ii++; while (--t > 0); - assert(ii == ip); - m_off -= 1; - *op++ = LZO_BYTE(M1_MARKER | ((m_off & 3) << 2)); - *op++ = LZO_BYTE(m_off >> 2); - ip += 2; - goto match_done; - } -#endif - } - } - - - /* a literal */ + lzo_uint m_off; + lzo_uint m_len; + { + lzo_uint32_t dv; + lzo_uint dindex; literal: - UPDATE_I(dict,0,dindex,ip,in); - ++ip; + ip += 1 + ((ip - ii) >> 5); +next: if __lzo_unlikely(ip >= ip_end) break; - continue; - + dv = UA_GET_LE32(ip); + dindex = DINDEX(dv,ip); + GINDEX(m_off,m_pos,in+dict,dindex,in); + UPDATE_I(dict,0,dindex,ip,in); + if __lzo_unlikely(dv != UA_GET_LE32(m_pos)) + goto literal; + } +#endif /* a match */ -match: - UPDATE_I(dict,0,dindex,ip,in); - /* store current literal run */ - if (pd(ip,ii) > 0) - { - register lzo_uint t = pd(ip,ii); + ii -= ti; ti = 0; + { + lzo_uint t = pd(ip,ii); + if (t != 0) + { if (t <= 3) { - assert(op - 2 > out); - op[-2] |= LZO_BYTE(t); + op[-2] = LZO_BYTE(op[-2] | t); +#if (LZO_OPT_UNALIGNED32) + UA_COPY4(op, ii); + op += t; +#else + { do *op++ = *ii++; while (--t > 0); } +#endif } - else if (t <= 18) +#if (LZO_OPT_UNALIGNED32) || (LZO_OPT_UNALIGNED64) + else if (t <= 16) + { *op++ = LZO_BYTE(t - 3); + UA_COPY8(op, ii); + UA_COPY8(op+8, ii+8); + op += t; + } +#endif else { - register lzo_uint tt = t - 18; - - *op++ = 0; - while (tt > 255) + if (t <= 18) + *op++ = LZO_BYTE(t - 3); + else { - tt -= 255; + lzo_uint tt = t - 18; *op++ = 0; + while __lzo_unlikely(tt > 255) + { + tt -= 255; + UA_SET1(op, 0); + op++; + } + assert(tt > 0); + *op++ = LZO_BYTE(tt); } - assert(tt > 0); - *op++ = LZO_BYTE(tt); +#if (LZO_OPT_UNALIGNED32) || (LZO_OPT_UNALIGNED64) + do { + UA_COPY8(op, ii); + UA_COPY8(op+8, ii+8); + op += 16; ii += 16; t -= 16; + } while (t >= 16); if (t > 0) +#endif + { do *op++ = *ii++; while (--t > 0); } } - do *op++ = *ii++; while (--t > 0); } - - /* code the match */ - assert(ii == ip); - ip += 3; - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++ || - m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++ -#ifdef LZO1Y - || m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++ - || m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++ + } + m_len = 4; + { +#if (LZO_OPT_UNALIGNED64) + lzo_uint64_t v; + v = UA_GET_NE64(ip + m_len) ^ UA_GET_NE64(m_pos + m_len); + if __lzo_unlikely(v == 0) { + do { + m_len += 8; + v = UA_GET_NE64(ip + m_len) ^ UA_GET_NE64(m_pos + m_len); + if __lzo_unlikely(ip + m_len >= ip_end) + goto m_len_done; + } while (v == 0); + } +#if (LZO_ABI_BIG_ENDIAN) && defined(lzo_bitops_ctlz64) + m_len += lzo_bitops_ctlz64(v) / CHAR_BIT; +#elif (LZO_ABI_BIG_ENDIAN) + if ((v >> (64 - CHAR_BIT)) == 0) do { + v <<= CHAR_BIT; + m_len += 1; + } while ((v >> (64 - CHAR_BIT)) == 0); +#elif (LZO_ABI_LITTLE_ENDIAN) && defined(lzo_bitops_cttz64) + m_len += lzo_bitops_cttz64(v) / CHAR_BIT; +#elif (LZO_ABI_LITTLE_ENDIAN) + if ((v & UCHAR_MAX) == 0) do { + v >>= CHAR_BIT; + m_len += 1; + } while ((v & UCHAR_MAX) == 0); +#else + if (ip[m_len] == m_pos[m_len]) do { + m_len += 1; + } while (ip[m_len] == m_pos[m_len]); +#endif +#elif (LZO_OPT_UNALIGNED32) + lzo_uint32_t v; + v = UA_GET_NE32(ip + m_len) ^ UA_GET_NE32(m_pos + m_len); + if __lzo_unlikely(v == 0) { + do { + m_len += 4; + v = UA_GET_NE32(ip + m_len) ^ UA_GET_NE32(m_pos + m_len); + if (v != 0) + break; + m_len += 4; + v = UA_GET_NE32(ip + m_len) ^ UA_GET_NE32(m_pos + m_len); + if __lzo_unlikely(ip + m_len >= ip_end) + goto m_len_done; + } while (v == 0); + } +#if (LZO_ABI_BIG_ENDIAN) && defined(lzo_bitops_ctlz32) + m_len += lzo_bitops_ctlz32(v) / CHAR_BIT; +#elif (LZO_ABI_BIG_ENDIAN) + if ((v >> (32 - CHAR_BIT)) == 0) do { + v <<= CHAR_BIT; + m_len += 1; + } while ((v >> (32 - CHAR_BIT)) == 0); +#elif (LZO_ABI_LITTLE_ENDIAN) && defined(lzo_bitops_cttz32) + m_len += lzo_bitops_cttz32(v) / CHAR_BIT; +#elif (LZO_ABI_LITTLE_ENDIAN) + if ((v & UCHAR_MAX) == 0) do { + v >>= CHAR_BIT; + m_len += 1; + } while ((v & UCHAR_MAX) == 0); +#else + if (ip[m_len] == m_pos[m_len]) do { + m_len += 1; + } while (ip[m_len] == m_pos[m_len]); +#endif +#else + if __lzo_unlikely(ip[m_len] == m_pos[m_len]) { + do { + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if __lzo_unlikely(ip + m_len >= ip_end) + goto m_len_done; + } while (ip[m_len] == m_pos[m_len]); + } #endif - ) + } +m_len_done: + m_off = pd(ip,m_pos); + ip += m_len; + ii = ip; + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { - --ip; - m_len = pd(ip, ii); - assert(m_len >= 3); assert(m_len <= M2_MAX_LEN); - - if (m_off <= M2_MAX_OFFSET) - { - m_off -= 1; + m_off -= 1; #if defined(LZO1X) - *op++ = LZO_BYTE(((m_len - 1) << 5) | ((m_off & 7) << 2)); - *op++ = LZO_BYTE(m_off >> 3); + *op++ = LZO_BYTE(((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = LZO_BYTE(m_off >> 3); #elif defined(LZO1Y) - *op++ = LZO_BYTE(((m_len + 1) << 4) | ((m_off & 3) << 2)); - *op++ = LZO_BYTE(m_off >> 2); + *op++ = LZO_BYTE(((m_len + 1) << 4) | ((m_off & 3) << 2)); + *op++ = LZO_BYTE(m_off >> 2); #endif - } - else if (m_off <= M3_MAX_OFFSET) - { - m_off -= 1; + } + else if (m_off <= M3_MAX_OFFSET) + { + m_off -= 1; + if (m_len <= M3_MAX_LEN) *op++ = LZO_BYTE(M3_MARKER | (m_len - 2)); - goto m3_m4_offset; - } else -#if defined(LZO1X) { - m_off -= 0x4000; - assert(m_off > 0); assert(m_off <= 0x7fff); - *op++ = LZO_BYTE(M4_MARKER | - ((m_off & 0x4000) >> 11) | (m_len - 2)); - goto m3_m4_offset; + m_len -= M3_MAX_LEN; + *op++ = M3_MARKER | 0; + while __lzo_unlikely(m_len > 255) + { + m_len -= 255; + UA_SET1(op, 0); + op++; + } + *op++ = LZO_BYTE(m_len); } -#elif defined(LZO1Y) - goto m4_match; -#endif + *op++ = LZO_BYTE(m_off << 2); + *op++ = LZO_BYTE(m_off >> 6); } else { - { - const lzo_bytep end = in_end; - const lzo_bytep m = m_pos + M2_MAX_LEN + 1; - while (ip < end && *m == *ip) - m++, ip++; - m_len = pd(ip, ii); - } - assert(m_len > M2_MAX_LEN); - - if (m_off <= M3_MAX_OFFSET) - { - m_off -= 1; - if (m_len <= 33) - *op++ = LZO_BYTE(M3_MARKER | (m_len - 2)); - else - { - m_len -= 33; - *op++ = M3_MARKER | 0; - goto m3_m4_len; - } - } + m_off -= 0x4000; + if (m_len <= M4_MAX_LEN) + *op++ = LZO_BYTE(M4_MARKER | ((m_off >> 11) & 8) | (m_len - 2)); else { -#if defined(LZO1Y) -m4_match: -#endif - m_off -= 0x4000; - assert(m_off > 0); assert(m_off <= 0x7fff); - if (m_len <= M4_MAX_LEN) - *op++ = LZO_BYTE(M4_MARKER | - ((m_off & 0x4000) >> 11) | (m_len - 2)); - else + m_len -= M4_MAX_LEN; + *op++ = LZO_BYTE(M4_MARKER | ((m_off >> 11) & 8)); + while __lzo_unlikely(m_len > 255) { - m_len -= M4_MAX_LEN; - *op++ = LZO_BYTE(M4_MARKER | ((m_off & 0x4000) >> 11)); -m3_m4_len: - while (m_len > 255) - { - m_len -= 255; - *op++ = 0; - } - assert(m_len > 0); - *op++ = LZO_BYTE(m_len); + m_len -= 255; + UA_SET1(op, 0); + op++; } + *op++ = LZO_BYTE(m_len); } - -m3_m4_offset: - *op++ = LZO_BYTE((m_off & 63) << 2); + *op++ = LZO_BYTE(m_off << 2); *op++ = LZO_BYTE(m_off >> 6); } - -#if 0 -match_done: -#endif - ii = ip; - if __lzo_unlikely(ip >= ip_end) - break; + goto next; } *out_len = pd(op, out); - return pd(in_end,ii); + return pd(in_end,ii-ti); } @@ -299,16 +338,30 @@ DO_COMPRESS ( const lzo_bytep in , lzo_uint in_len, lzo_bytep out, lzo_uintp out_len, lzo_voidp wrkmem ) { + const lzo_bytep ip = in; lzo_bytep op = out; - lzo_uint t; + lzo_uint l = in_len; + lzo_uint t = 0; - if __lzo_unlikely(in_len <= M2_MAX_LEN + 5) - t = in_len; - else + while (l > 20) { - t = do_compress(in,in_len,op,out_len,wrkmem); + lzo_uint ll = l; + lzo_uintptr_t ll_end; +#if 0 || (LZO_DETERMINISTIC) + ll = LZO_MIN(ll, 49152); +#endif + ll_end = (lzo_uintptr_t)ip + ll; + if ((ll_end + ((t + ll) >> 5)) <= ll_end || (const lzo_bytep)(ll_end + ((t + ll) >> 5)) <= ip + ll) + break; +#if (LZO_DETERMINISTIC) + lzo_memset(wrkmem, 0, ((lzo_uint)1 << D_BITS) * sizeof(lzo_dict_t)); +#endif + t = do_compress(ip,ll,op,out_len,t,wrkmem); + ip += ll; op += *out_len; + l -= ll; } + t += l; if (t > 0) { @@ -317,7 +370,7 @@ DO_COMPRESS ( const lzo_bytep in , lzo_uint in_len, if (op == out && t <= 238) *op++ = LZO_BYTE(17 + t); else if (t <= 3) - op[-2] |= LZO_BYTE(t); + op[-2] = LZO_BYTE(op[-2] | t); else if (t <= 18) *op++ = LZO_BYTE(t - 3); else @@ -328,12 +381,14 @@ DO_COMPRESS ( const lzo_bytep in , lzo_uint in_len, while (tt > 255) { tt -= 255; - *op++ = 0; + UA_SET1(op, 0); + op++; } assert(tt > 0); *op++ = LZO_BYTE(tt); } - do *op++ = *ii++; while (--t > 0); + UA_COPYN(op, ii, t); + op += t; } *op++ = M4_MARKER | 1; @@ -348,4 +403,3 @@ DO_COMPRESS ( const lzo_bytep in , lzo_uint in_len, /* vi:ts=4:et */ - |