diff options
author | snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143> | 2011-06-02 17:59:40 +0000 |
---|---|---|
committer | snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143> | 2011-06-02 17:59:40 +0000 |
commit | c266bbf32103f8ed4a83e2272ed3d8828d5b8b34 (patch) | |
tree | 82a418618cd831e21badd8f2269c536c77209881 /snappy.cc | |
parent | d0ee043bc50c62c5b5ff3da044f0b5567257407d (diff) | |
download | snappy-c266bbf32103f8ed4a83e2272ed3d8828d5b8b34.tar.gz snappy-c266bbf32103f8ed4a83e2272ed3d8828d5b8b34.tar.bz2 snappy-c266bbf32103f8ed4a83e2272ed3d8828d5b8b34.zip |
Speed up decompression by caching ip_.
It is seemingly hard for the compiler to understand that ip_, the current input
pointer into the compressed data stream, can not alias on anything else, and
thus using it directly will incur memory traffic as it cannot be kept in a
register. The code already knew about this and cached it into a local
variable, but since Step() only decoded one tag, it had to move ip_ back into
place between every tag. This seems to have cost us a significant amount of
performance, so changing Step() into a function that decodes as much as it can
before it saves ip_ back and returns. (Note that Step() was already inlined,
so it is not the manual inlining that buys the performance here.)
The wins are about 3-6% for Core 2, 6-13% on Core i7 and 5-12% on Opteron
(for plain array-to-array decompression, in 64-bit opt mode).
There is a tiny difference in the behavior here; if an invalid literal is
encountered (ie., the writer refuses the Append() operation), ip_ will now
point to the byte past the tag byte, instead of where the literal was
originally thought to end. However, we don't use ip_ for anything after
DecompressAllTags() has returned, so this should not change external behavior
in any way.
Microbenchmark results for Core i7, 64-bit (Opteron results are similar):
Benchmark Time(ns) CPU(ns) Iterations
---------------------------------------------------
BM_UFlat/0 79134 79110 8835 1.2GB/s html [ +6.2%]
BM_UFlat/1 786126 786096 891 851.8MB/s urls [+10.0%]
BM_UFlat/2 9948 9948 69125 11.9GB/s jpg [ -1.3%]
BM_UFlat/3 31999 31998 21898 2.7GB/s pdf [ +6.5%]
BM_UFlat/4 318909 318829 2204 1.2GB/s html4 [ +6.5%]
BM_UFlat/5 31384 31390 22363 747.5MB/s cp [ +9.2%]
BM_UFlat/6 14037 14034 49858 757.7MB/s c [+10.6%]
BM_UFlat/7 4612 4612 151395 769.5MB/s lsp [ +9.5%]
BM_UFlat/8 1203174 1203007 582 816.3MB/s xls [+19.3%]
BM_UFlat/9 253869 253955 2757 571.1MB/s txt1 [+11.4%]
BM_UFlat/10 219292 219290 3194 544.4MB/s txt2 [+12.1%]
BM_UFlat/11 672135 672131 1000 605.5MB/s txt3 [+11.2%]
BM_UFlat/12 902512 902492 776 509.2MB/s txt4 [+12.5%]
BM_UFlat/13 372110 371998 1881 1.3GB/s bin [ +5.8%]
BM_UFlat/14 50407 50407 10000 723.5MB/s sum [+13.5%]
BM_UFlat/15 5699 5701 100000 707.2MB/s man [+12.4%]
BM_UFlat/16 83448 83424 8383 1.3GB/s pb [ +5.7%]
BM_UFlat/17 256958 256963 2723 684.1MB/s gaviota [ +7.9%]
BM_UValidate/0 42795 42796 16351 2.2GB/s html [+25.8%]
BM_UValidate/1 490672 490622 1427 1.3GB/s urls [+22.7%]
BM_UValidate/2 237 237 2950297 499.0GB/s jpg [+24.9%]
BM_UValidate/3 14610 14611 47901 6.0GB/s pdf [+26.8%]
BM_UValidate/4 171973 171990 4071 2.2GB/s html4 [+25.7%]
git-svn-id: https://snappy.googlecode.com/svn/trunk@38 03e5f5b5-db94-4691-08a0-1a8bf15f6143
Diffstat (limited to 'snappy.cc')
-rw-r--r-- | snappy.cc | 81 |
1 files changed, 45 insertions, 36 deletions
@@ -653,45 +653,54 @@ class SnappyDecompressor { // Process the next item found in the input. // Returns true if successful, false on error or end of input. template <class Writer> - bool Step(Writer* writer) { + void DecompressAllTags(Writer* writer) { const char* ip = ip_; - if (ip_limit_ - ip < 5) { - if (!RefillTag()) return false; - ip = ip_; - } - - const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++)); - const uint32 entry = char_table[c]; - const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; - ip += entry >> 11; - const uint32 length = entry & 0xff; + for ( ;; ) { + if (ip_limit_ - ip < 5) { + ip_ = ip; + if (!RefillTag()) return; + ip = ip_; + } - if ((c & 0x3) == LITERAL) { - uint32 literal_length = length + trailer; - uint32 avail = ip_limit_ - ip; - while (avail < literal_length) { + const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++)); + const uint32 entry = char_table[c]; + const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; + ip += entry >> 11; + const uint32 length = entry & 0xff; + + if ((c & 0x3) == LITERAL) { + uint32 literal_length = length + trailer; + uint32 avail = ip_limit_ - ip; + while (avail < literal_length) { + bool allow_fast_path = (avail >= 16); + if (!writer->Append(ip, avail, allow_fast_path)) goto end; + literal_length -= avail; + reader_->Skip(peeked_); + size_t n; + ip = reader_->Peek(&n); + avail = n; + peeked_ = avail; + if (avail == 0) goto end; // Premature end of input + ip_limit_ = ip + avail; + } bool allow_fast_path = (avail >= 16); - if (!writer->Append(ip, avail, allow_fast_path)) return false; - literal_length -= avail; - reader_->Skip(peeked_); - size_t n; - ip = reader_->Peek(&n); - avail = n; - peeked_ = avail; - if (avail == 0) return false; // Premature end of input - ip_limit_ = ip + avail; + if (!writer->Append(ip, literal_length, allow_fast_path)) { + goto end; + } + ip += literal_length; + } else { + // copy_offset/256 is encoded in bits 8..10. By just fetching + // those bits, we get copy_offset (since the bit-field starts at + // bit 8). + const uint32 copy_offset = entry & 0x700; + if (!writer->AppendFromSelf(copy_offset + trailer, length)) { + goto end; + } } - ip_ = ip + literal_length; - bool allow_fast_path = (avail >= 16); - return writer->Append(ip, literal_length, allow_fast_path); - } else { - ip_ = ip; - // copy_offset/256 is encoded in bits 8..10. By just fetching - // those bits, we get copy_offset (since the bit-field starts at - // bit 8). - const uint32 copy_offset = entry & 0x700; - return writer->AppendFromSelf(copy_offset + trailer, length); } + +end: + ip_ = ip; } }; @@ -770,7 +779,7 @@ static bool InternalUncompress(Source* r, writer->SetExpectedLength(uncompressed_len); // Process the entire input - while (decompressor.Step(writer)) { } + decompressor.DecompressAllTags(writer); return (decompressor.eof() && writer->CheckLength()); } @@ -866,7 +875,7 @@ size_t Compress(Source* reader, Sink* writer) { // A type that writes to a flat array. // Note that this is not a "ByteSink", but a type that matches the -// Writer template argument to SnappyDecompressor::Step(). +// Writer template argument to SnappyDecompressor::DecompressAllTags(). class SnappyArrayWriter { private: char* base_; |