summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorckennelly <ckennelly@google.com>2016-06-27 05:01:31 -0700
committerAlkis Evlogimenos <alkis@google.com>2017-01-26 21:38:49 +0100
commit094c67de88f41eae494a3823f8aaf0f77b25b980 (patch)
treee3b9cbe5582adaaae553da00463112ff6470f860
parentfce661fa8cfec792dd1ef7fee52320188349feae (diff)
downloadsnappy-094c67de88f41eae494a3823f8aaf0f77b25b980.tar.gz
snappy-094c67de88f41eae494a3823f8aaf0f77b25b980.tar.bz2
snappy-094c67de88f41eae494a3823f8aaf0f77b25b980.zip
Speed up the EmitLiteral fast path, +1.62% for ZFlat benchmarks.
This is inspired by the Go version in //third_party/golang/snappy/encode_amd64.s (emitLiteralFastPath) Benchmark Base:Reference (1) -------------------------------------------------- (BM_ZFlat_0 1/cputime_ns) 9.669e-06 +1.65% (BM_ZFlat_1 1/cputime_ns) 7.643e-07 +2.53% (BM_ZFlat_10 1/cputime_ns) 1.107e-05 -0.97% (BM_ZFlat_11 1/cputime_ns) 3.002e-06 +0.71% (BM_ZFlat_12 1/cputime_ns) 2.338e-05 +7.22% (BM_ZFlat_13 1/cputime_ns) 6.386e-05 +9.18% (BM_ZFlat_14 1/cputime_ns) 0.0002256 -0.05% (BM_ZFlat_15 1/cputime_ns) 7.608e-07 -1.29% (BM_ZFlat_16 1/cputime_ns) 0.003236 -1.28% (BM_ZFlat_17 1/cputime_ns) 2.58e-06 +0.52% (BM_ZFlat_18 1/cputime_ns) 0.01538 +0.00% (BM_ZFlat_19 1/cputime_ns) 1.436e-05 +6.21% (BM_ZFlat_2 1/cputime_ns) 0.0001044 +4.99% (BM_ZFlat_20 1/cputime_ns) 0.0001608 -0.18% (BM_ZFlat_3 1/cputime_ns) 0.003745 +0.38% (BM_ZFlat_4 1/cputime_ns) 8.144e-05 +6.21% (BM_ZFlat_5 1/cputime_ns) 2.328e-06 -1.60% (BM_ZFlat_6 1/cputime_ns) 2.391e-06 +0.06% (BM_ZFlat_7 1/cputime_ns) 2.68e-06 -0.61% (BM_ZFlat_8 1/cputime_ns) 8.852e-07 +0.19% (BM_ZFlat_9 1/cputime_ns) 6.441e-07 +1.06% geometric mean +1.62%
-rw-r--r--snappy.cc38
1 files changed, 21 insertions, 17 deletions
diff --git a/snappy.cc b/snappy.cc
index 3b9988a..089219c 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -157,26 +157,30 @@ static inline char* EmitLiteral(char* op,
const char* literal,
int len,
bool allow_fast_path) {
- int n = len - 1; // Zero-length literals are disallowed
- if (n < 60) {
+ // The vast majority of copies are below 16 bytes, for which a
+ // call to memcpy is overkill. This fast path can sometimes
+ // copy up to 15 bytes too much, but that is okay in the
+ // main loop, since we have a bit to go on for both sides:
+ //
+ // - The input will always have kInputMarginBytes = 15 extra
+ // available bytes, as long as we're in the main loop, and
+ // if not, allow_fast_path = false.
+ // - The output will always have 32 spare bytes (see
+ // MaxCompressedLength).
+ assert(len > 0); // Zero-length literals are disallowed
+ int n = len - 1;
+ if (allow_fast_path && len <= 16) {
// Fits in tag byte
*op++ = LITERAL | (n << 2);
- // The vast majority of copies are below 16 bytes, for which a
- // call to memcpy is overkill. This fast path can sometimes
- // copy up to 15 bytes too much, but that is okay in the
- // main loop, since we have a bit to go on for both sides:
- //
- // - The input will always have kInputMarginBytes = 15 extra
- // available bytes, as long as we're in the main loop, and
- // if not, allow_fast_path = false.
- // - The output will always have 32 spare bytes (see
- // MaxCompressedLength).
- if (allow_fast_path && len <= 16) {
- UnalignedCopy64(literal, op);
- UnalignedCopy64(literal + 8, op + 8);
- return op + len;
- }
+ UnalignedCopy64(literal, op);
+ UnalignedCopy64(literal + 8, op + 8);
+ return op + len;
+ }
+
+ if (n < 60) {
+ // Fits in tag byte
+ *op++ = LITERAL | (n << 2);
} else {
// Encode in upcoming bytes
char* base = op;