diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2023-02-27 11:44:29 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2023-02-27 11:44:29 +0900 |
commit | 78b5ebcb6e225835f8f00c9b591826c7c4131bc1 (patch) | |
tree | 2e3ce50d21a2c4cbcceaca18197afbb60010c644 | |
download | rust-ahash-upstream.tar.gz rust-ahash-upstream.tar.bz2 rust-ahash-upstream.zip |
Import ahash 0.8.3upstream/0.8.3upstream
-rw-r--r-- | .cargo_vcs_info.json | 6 | ||||
-rw-r--r-- | .github/workflows/rust.yml | 156 | ||||
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Cargo.toml | 157 | ||||
-rw-r--r-- | Cargo.toml.orig | 101 | ||||
-rw-r--r-- | FAQ.md | 118 | ||||
-rw-r--r-- | LICENSE-APACHE | 201 | ||||
-rw-r--r-- | LICENSE-MIT | 25 | ||||
-rw-r--r-- | README.md | 108 | ||||
-rw-r--r-- | build.rs | 23 | ||||
-rw-r--r-- | rustfmt.toml | 1 | ||||
-rw-r--r-- | src/aes_hash.rs | 433 | ||||
-rw-r--r-- | src/convert.rs | 163 | ||||
-rw-r--r-- | src/fallback_hash.rs | 368 | ||||
-rw-r--r-- | src/hash_map.rs | 501 | ||||
-rw-r--r-- | src/hash_quality_test.rs | 504 | ||||
-rw-r--r-- | src/hash_set.rs | 352 | ||||
-rw-r--r-- | src/lib.rs | 397 | ||||
-rw-r--r-- | src/operations.rs | 383 | ||||
-rw-r--r-- | src/random_state.rs | 529 | ||||
-rw-r--r-- | src/specialize.rs | 218 | ||||
-rw-r--r-- | tests/bench.rs | 198 | ||||
-rw-r--r-- | tests/map_tests.rs | 234 | ||||
-rw-r--r-- | tests/nopanic.rs | 81 |
24 files changed, 5260 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..5f20ebf --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "f9acd508bd89e7c5b2877a9510098100f9018d64" + }, + "path_in_vcs": "" +}
\ No newline at end of file diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..c551724 --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,156 @@ +name: Rust + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install latest stable + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + components: clippy + - name: check nostd + uses: actions-rs/cargo@v1 + with: + command: check + args: --no-default-features + - name: test nostd + uses: actions-rs/cargo@v1 + with: + command: test + args: --no-default-features + - name: check constrandom + uses: actions-rs/cargo@v1 + with: + command: check + args: --no-default-features --features compile-time-rng + - name: test constrandom + uses: actions-rs/cargo@v1 + with: + command: test + args: --no-default-features --features compile-time-rng + - name: check fixed-seed + uses: actions-rs/cargo@v1 + with: + command: check + args: --no-default-features --features std + - name: check + uses: actions-rs/cargo@v1 + with: + command: check + - name: test + uses: actions-rs/cargo@v1 + with: + command: test + nightly: + name: nightly + runs-on: ubuntu-latest + env: + RUSTFLAGS: -C target-cpu=native + steps: + - uses: actions/checkout@v2 + - name: Install latest nightly + uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + override: true + components: clippy + - name: check nightly + uses: actions-rs/cargo@v1 + with: + command: check + - name: test nightly + uses: actions-rs/cargo@v1 + with: + command: test + - name: check serde + uses: actions-rs/cargo@v1 + with: + command: check + args: --features serde + - name: test serde + uses: actions-rs/cargo@v1 + with: + command: test + args: --features serde + linux_arm7: + name: Linux ARMv7 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: armv7-unknown-linux-gnueabihf + - uses: actions-rs/cargo@v1 + with: + command: check + args: --target armv7-unknown-linux-gnueabihf + i686-unknown-linux-gnu: + name: Linux i686 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: i686-unknown-linux-gnu + - name: Install cross compile tools + run: sudo apt-get install -y gcc-multilib libc6-i386 libc6-dev-i386 + - uses: actions-rs/cargo@v1 + with: + command: check + args: --target i686-unknown-linux-gnu + - uses: actions-rs/cargo@v1 + with: + command: test + args: --target i686-unknown-linux-gnu + x86_64-unknown-linux-gnu: + name: Linux x86_64 - nightly + runs-on: ubuntu-latest + env: + RUSTFLAGS: -C target-cpu=skylake -C target-feature=+aes + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + override: true + target: x86_64-unknown-linux-gnu + - uses: actions-rs/cargo@v1 + with: + command: check + args: --target x86_64-unknown-linux-gnu + - uses: actions-rs/cargo@v1 + with: + command: test + args: --target x86_64-unknown-linux-gnu + thumbv6m: + name: thumbv6m + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: thumbv6m-none-eabi + - uses: actions-rs/cargo@v1 + with: + command: check + args: --target thumbv6m-none-eabi --no-default-features + wasm32-unknown-unknown: + name: wasm + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: wasm32-unknown-unknown + - uses: actions-rs/cargo@v1 + with: + command: check + args: --target wasm32-unknown-unknown --no-default-features diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fc89f1b --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea/ +Cargo.lock +target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a027c55 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,157 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "ahash" +version = "0.8.3" +authors = ["Tom Kaitchuck <Tom.Kaitchuck@gmail.com>"] +build = "./build.rs" +exclude = [ + "/smhasher", + "/benchmark_tools", +] +description = "A non-cryptographic hash function using AES-NI for high performance" +documentation = "https://docs.rs/ahash" +readme = "README.md" +keywords = [ + "hash", + "hasher", + "hashmap", + "aes", + "no-std", +] +categories = [ + "algorithms", + "data-structures", + "no-std", +] +license = "MIT OR Apache-2.0" +repository = "https://github.com/tkaitchuck/ahash" + +[package.metadata.docs.rs] +rustc-args = [ + "-C", + "target-feature=+aes", +] +rustdoc-args = [ + "-C", + "target-feature=+aes", +] +features = ["std"] + +[profile.bench] +opt-level = 3 +lto = "fat" +codegen-units = 1 +debug = false +debug-assertions = false + +[profile.release] +opt-level = 3 +lto = "fat" +codegen-units = 1 +debug = false +debug-assertions = false + +[profile.test] +opt-level = 2 +lto = "fat" + +[lib] +name = "ahash" +path = "src/lib.rs" +test = true +doctest = true +bench = true +doc = true + +[[bench]] +name = "ahash" +path = "tests/bench.rs" +harness = false + +[[bench]] +name = "map" +path = "tests/map_tests.rs" +harness = false + +[dependencies.atomic-polyfill] +version = "1.0.1" +optional = true + +[dependencies.cfg-if] +version = "1.0" + +[dependencies.const-random] +version = "0.1.12" +optional = true + +[dependencies.getrandom] +version = "0.2.7" +optional = true + +[dependencies.serde] +version = "1.0.117" +optional = true + +[dev-dependencies.criterion] +version = "0.3.2" +features = ["html_reports"] + +[dev-dependencies.fnv] +version = "1.0.5" + +[dev-dependencies.fxhash] +version = "0.2.1" + +[dev-dependencies.hashbrown] +version = "0.12.3" + +[dev-dependencies.hex] +version = "0.4.2" + +[dev-dependencies.no-panic] +version = "0.1.10" + +[dev-dependencies.rand] +version = "0.8.5" + +[dev-dependencies.seahash] +version = "4.0" + +[dev-dependencies.serde_json] +version = "1.0.59" + +[build-dependencies.version_check] +version = "0.9.4" + +[features] +atomic-polyfill = [ + "dep:atomic-polyfill", + "once_cell/atomic-polyfill", +] +compile-time-rng = ["const-random"] +default = [ + "std", + "runtime-rng", +] +no-rng = [] +runtime-rng = ["getrandom"] +std = [] + +[target."cfg(not(all(target_arch = \"arm\", target_os = \"none\")))".dependencies.once_cell] +version = "1.13.1" +features = [ + "unstable", + "alloc", +] +default-features = false diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..9b19105 --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,101 @@ +[package] +name = "ahash" +version = "0.8.3" +authors = ["Tom Kaitchuck <Tom.Kaitchuck@gmail.com>"] +license = "MIT OR Apache-2.0" +description = "A non-cryptographic hash function using AES-NI for high performance" +documentation = "https://docs.rs/ahash" +repository = "https://github.com/tkaitchuck/ahash" +keywords = ["hash", "hasher", "hashmap", "aes", "no-std"] +categories = ["algorithms", "data-structures", "no-std"] +edition = "2018" +readme = "README.md" +build = "./build.rs" +exclude = ["/smhasher", "/benchmark_tools"] + +[lib] +name = "ahash" +path = "src/lib.rs" +test = true +doctest = true +bench = true +doc = true + +[features] +default = ["std", "runtime-rng"] + +# Enabling this will enable `AHashMap` and `AHashSet`. +std = [] + +# Runtime random key generation using getrandom. +runtime-rng = ["getrandom"] + +# This is an alternative to runtime key generation which does compile time key generation if runtime-rng is not available. +# (If runtime-rng is enabled this does nothing.) +# If this is on (and runtime-rng is off) it implies the produced binary will not be identical. +# If this is disabled and runtime-rng is unavailable constant keys are used. +compile-time-rng = ["const-random"] + +# Do not use any random number generator (either at compile time or runtime) +# If either runtime-rng or compile-time-rng are enabled this does nothing. +no-rng = [] + +# in case this is being used on an architecture lacking core::sync::atomic::AtomicUsize and friends +atomic-polyfill = [ "dep:atomic-polyfill", "once_cell/atomic-polyfill"] + +[[bench]] +name = "ahash" +path = "tests/bench.rs" +harness = false + +[[bench]] +name = "map" +path = "tests/map_tests.rs" +harness = false + +[profile.test] +opt-level = 2 +lto = 'fat' + +[profile.release] +opt-level = 3 +debug = false +lto = 'fat' +debug-assertions = false +codegen-units = 1 + +[profile.bench] +opt-level = 3 +debug = false +lto = 'fat' +debug-assertions = false +codegen-units = 1 + +[build-dependencies] +version_check = "0.9.4" + +[dependencies] +const-random = { version = "0.1.12", optional = true } +serde = { version = "1.0.117", optional = true } +cfg-if = "1.0" +atomic-polyfill = { version="1.0.1", optional=true} +getrandom = { version = "0.2.7", optional = true } + +[target.'cfg(not(all(target_arch = "arm", target_os = "none")))'.dependencies] +once_cell = { version = "1.13.1", default-features = false, features = ["unstable", "alloc"] } + +[dev-dependencies] +no-panic = "0.1.10" +criterion = {version = "0.3.2", features = ["html_reports"] } +seahash = "4.0" +fnv = "1.0.5" +fxhash = "0.2.1" +hex = "0.4.2" +rand = "0.8.5" +serde_json = "1.0.59" +hashbrown = "0.12.3" + +[package.metadata.docs.rs] +rustc-args = ["-C", "target-feature=+aes"] +rustdoc-args = ["-C", "target-feature=+aes"] +features = ["std"] @@ -0,0 +1,118 @@ +## How does aHash prevent DOS attacks + +AHash is designed to [prevent an adversary that does not know the key from being able to create hash collisions or partial collisions.](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks) + +If you are a cryptographer and would like to help review aHash's algorithm, please post a comment [here](https://github.com/tkaitchuck/aHash/issues/11). + +In short, this is achieved by ensuring that: + +* aHash is designed to [resist differential crypto analysis](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks#differential-analysis). Meaning it should not be possible to devise a scheme to "cancel" out a modification of the internal state from a block of input via some corresponding change in a subsequent block of input. + * This is achieved by not performing any "premixing" - This reversible mixing gave previous hashes such as murmurhash confidence in their quality, but could be undone by a deliberate attack. + * Before it is used each chunk of input is "masked" such as by xoring it with an unpredictable value. +* aHash obeys the '[strict avalanche criterion](https://en.wikipedia.org/wiki/Avalanche_effect#Strict_avalanche_criterion)': +Each bit of input has the potential to flip every bit of the output. +* Similarly, each bit in the key can affect every bit in the output. +* Input bits never effect just one, or a very few, bits in intermediate state. This is specifically designed to prevent the sort of +[differential attacks launched by the sipHash authors](https://emboss.github.io/blog/2012/12/14/breaking-murmur-hash-flooding-dos-reloaded/) which cancel previous inputs. +* The `finish` call at the end of the hash is designed to not expose individual bits of the internal state. + * For example in the main algorithm 256bits of state and 256bits of keys are reduced to 64 total bits using 3 rounds of AES encryption. +Reversing this is more than non-trivial. Most of the information is by definition gone, and any given bit of the internal state is fully diffused across the output. +* In both aHash and its fallback the internal state is divided into two halves which are updated by two unrelated techniques using the same input. - This means that if there is a way to attack one of them it likely won't be able to attack both of them at the same time. +* It is deliberately difficult to 'chain' collisions. (This has been the major technique used to weaponize attacks on other hash functions) + +More details are available on [the wiki](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks). + +## Why not use a cryptographic hash in a hashmap. + +Cryptographic hashes are designed to make is nearly impossible to find two items that collide when the attacker has full control +over the input. This has several implications: + +* They are very difficult to construct, and have to go to a lot of effort to ensure that collisions are not possible. +* They have no notion of a 'key'. Rather, they are fully deterministic and provide exactly one hash for a given input. + +For a HashMap the requirements are different. + +* Speed is very important, especially for short inputs. Often the key for a HashMap is a single `u32` or similar, and to be effective +the bucket that it should be hashed to needs to be computed in just a few CPU cycles. +* A hashmap does not need to provide a hard and fast guarantee that no two inputs will ever collide. Hence, hashCodes are not 256bits +but are just 64 or 32 bits in length. Often the first thing done with the hashcode is to truncate it further to compute which among a few buckets should be used for a key. + * Here collisions are expected, and a cheap to deal with provided there is no systematic way to generated huge numbers of values that all +go to the same bucket. + * This also means that unlike a cryptographic hash partial collisions matter. It doesn't do a hashmap any good to produce a unique 256bit hash if +the lower 12 bits are all the same. This means that even a provably irreversible hash would not offer protection from a DOS attack in a hashmap +because an attacker can easily just brute force the bottom N bits. + +From a cryptography point of view, a hashmap needs something closer to a block cypher. +Where the input can be quickly mixed in a way that cannot be reversed without knowing a key. + +## Why isn't aHash cryptographically secure + +It is not designed to be. +Attempting to use aHash as a secure hash will likely fail to hold up for several reasons: + +1. aHash relies on random keys which are assumed to not be observable by an attacker. For a cryptographic hash all inputs can be seen and controlled by the attacker. +2. aHash has not yet gone through peer review, which is a pre-requisite for security critical algorithms. +3. Because aHash uses reduced rounds of AES as opposed to the standard of 10. Things like the SQUARE attack apply to part of the internal state. +(These are mitigated by other means to prevent producing collections, but would be a problem in other contexts). +4. Like any cypher based hash, it will show certain statistical deviations from truly random output when comparing a (VERY) large number of hashes. +(By definition cyphers have fewer collisions than truly random data.) + +There are efforts to build a secure hash function that uses AES-NI for acceleration, but aHash is not one of them. + +## How is aHash so fast + +AHash uses a number of tricks. + +One trick is taking advantage of specialization. If aHash is compiled on nightly it will take +advantage of specialized hash implementations for strings, slices, and primitives. + +Another is taking advantage of hardware instructions. +When it is available aHash uses AES rounds using the AES-NI instruction. AES-NI is very fast (on an intel i7-6700 it +is as fast as a 64 bit multiplication.) and handles 16 bytes of input at a time, while being a very strong permutation. + +This is obviously much faster than most standard approaches to hashing, and does a better job of scrambling data than most non-secure hashes. + +On an intel i7-6700 compiled on nightly Rust with flags `-C opt-level=3 -C target-cpu=native -C codegen-units=1`: + +| Input | SipHash 1-3 time | FnvHash time|FxHash time| aHash time| aHash Fallback* | +|----------------|-----------|-----------|-----------|-----------|---------------| +| u8 | 9.3271 ns | 0.808 ns | **0.594 ns** | 0.7704 ns | 0.7664 ns | +| u16 | 9.5139 ns | 0.803 ns | **0.594 ns** | 0.7653 ns | 0.7704 ns | +| u32 | 9.1196 ns | 1.4424 ns | **0.594 ns** | 0.7637 ns | 0.7712 ns | +| u64 | 10.854 ns | 3.0484 ns | **0.628 ns** | 0.7788 ns | 0.7888 ns | +| u128 | 12.465 ns | 7.0728 ns | 0.799 ns | **0.6174 ns** | 0.6250 ns | +| 1 byte string | 11.745 ns | 2.4743 ns | 2.4000 ns | **1.4921 ns** | 1.5861 ns | +| 3 byte string | 12.066 ns | 3.5221 ns | 2.9253 ns | **1.4745 ns** | 1.8518 ns | +| 4 byte string | 11.634 ns | 4.0770 ns | 1.8818 ns | **1.5206 ns** | 1.8924 ns | +| 7 byte string | 14.762 ns | 5.9780 ns | 3.2282 ns | **1.5207 ns** | 1.8933 ns | +| 8 byte string | 13.442 ns | 4.0535 ns | 2.9422 ns | **1.6262 ns** | 1.8929 ns | +| 15 byte string | 16.880 ns | 8.3434 ns | 4.6070 ns | **1.6265 ns** | 1.7965 ns | +| 16 byte string | 15.155 ns | 7.5796 ns | 3.2619 ns | **1.6262 ns** | 1.8011 ns | +| 24 byte string | 16.521 ns | 12.492 ns | 3.5424 ns | **1.6266 ns** | 2.8311 ns | +| 68 byte string | 24.598 ns | 50.715 ns | 5.8312 ns | **4.8282 ns** | 5.4824 ns | +| 132 byte string| 39.224 ns | 119.96 ns | 11.777 ns | **6.5087 ns** | 9.1459 ns | +|1024 byte string| 254.00 ns | 1087.3 ns | 156.41 ns | **25.402 ns** | 54.566 ns | + +* Fallback refers to the algorithm aHash would use if AES instructions are unavailable. +For reference a hash that does nothing (not even reads the input data takes) **0.520 ns**. So that represents the fastest +possible time. + +As you can see above aHash like `FxHash` provides a large speedup over `SipHash-1-3` which is already nearly twice as fast as `SipHash-2-4`. + +Rust's HashMap by default uses `SipHash-1-3` because faster hash functions such as `FxHash` are predictable and vulnerable to denial of +service attacks. While `aHash` has both very strong scrambling and very high performance. + +AHash performs well when dealing with large inputs because aHash reads 8 or 16 bytes at a time. (depending on availability of AES-NI) + +Because of this, and its optimized logic, `aHash` is able to outperform `FxHash` with strings. +It also provides especially good performance dealing with unaligned input. +(Notice the big performance gaps between 3 vs 4, 7 vs 8 and 15 vs 16 in `FxHash` above) + +### Which CPUs can use the hardware acceleration + +Hardware AES instructions are built into Intel processors built after 2010 and AMD processors after 2012. +It is also available on [many other CPUs](https://en.wikipedia.org/wiki/AES_instruction_set) should in eventually +be able to get aHash to work. However, only X86 and X86-64 are the only supported architectures at the moment, as currently +they are the only architectures for which Rust provides an intrinsic. + +aHash also uses `sse2` and `sse3` instructions. X86 processors that have `aesni` also have these instruction sets. diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..cba2010 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 Tom Kaitchuck + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..18c421d --- /dev/null +++ b/README.md @@ -0,0 +1,108 @@ +# aHash ![Build Status](https://img.shields.io/github/actions/workflow/status/tkaitchuck/aHash/rust.yml?branch=master) ![Licence](https://img.shields.io/crates/l/ahash) ![Downloads](https://img.shields.io/crates/d/ahash) + +AHash is the [fastest](https://github.com/tkaitchuck/aHash/blob/master/compare/readme.md#Speed), +[DOS resistant hash](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks) currently available in Rust. +AHash is intended *exclusively* for use in in-memory hashmaps. + +AHash's output is of [high quality](https://github.com/tkaitchuck/aHash/blob/master/compare/readme.md#Quality) but aHash is **not** a cryptographically secure hash. + +## Design + +Because AHash is a keyed hash, each map will produce completely different hashes, which cannot be predicted without knowing the keys. +[This prevents DOS attacks where an attacker sends a large number of items whose hashes collide that get used as keys in a hashmap.](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks) + +This also avoids [accidentally quadratic behavior by reading from one map and writing to another.](https://accidentallyquadratic.tumblr.com/post/153545455987/rust-hash-iteration-reinsertion) + +## Goals and Non-Goals + +AHash does *not* have a fixed standard for its output. This allows it to improve over time. For example, +if any faster algorithm is found, aHash will be updated to incorporate the technique. +Similarly, should any flaw in aHash's DOS resistance be found, aHash will be changed to correct the flaw. + +Because it does not have a fixed standard, different computers or computers on different versions of the code will observe different hash values. +As such, aHash is not recommended for use other than in-memory maps. Specifically, aHash is not intended for network use or in applications which persist hashed values. +(In these cases `HighwayHash` would be a better choice) + +Additionally, aHash is not intended to be cryptographically secure and should not be used as a MAC, or anywhere which requires a cryptographically secure hash. +(In these cases `SHA-3` would be a better choice) + +## Usage + +AHash is a drop in replacement for the default implementation of the `Hasher` trait. To construct a `HashMap` using aHash +as its hasher do the following: + +```rust +use ahash::{AHasher, RandomState}; +use std::collections::HashMap; + +let mut map: HashMap<i32, i32, RandomState> = HashMap::default(); +map.insert(12, 34); +``` +For convenience, wrappers called `AHashMap` and `AHashSet` are also provided. +These do the same thing with slightly less typing. +```rust +use ahash::AHashMap; + +let mut map: AHashMap<i32, i32> = AHashMap::new(); +map.insert(12, 34); +map.insert(56, 78); +``` + +## Flags + +The aHash package has the following flags: +* `std`: This enables features which require the standard library. (On by default) This includes providing the utility classes `AHashMap` and `AHashSet`. +* `serde`: Enables `serde` support for the utility classes `AHashMap` and `AHashSet`. +* `runtime-rng`: To obtain a seed for Hashers will obtain randomness from the operating system. (On by default) +This is done using the [getrandom](https://github.com/rust-random/getrandom) crate. +* `compile-time-rng`: For OS targets without access to a random number generator, `compile-time-rng` provides an alternative. +If `getrandom` is unavailable and `compile-time-rng` is enabled, aHash will generate random numbers at compile time and embed them in the binary. +This allows for DOS resistance even if there is no random number generator available at runtime (assuming the compiled binary is not public). +This makes the binary non-deterministic. (If non-determinism is a problem see [constrandom's documentation](https://github.com/tkaitchuck/constrandom#deterministic-builds)) + +If both `runtime-rng` and `compile-time-rng` are enabled the `runtime-rng` will take precedence and `compile-time-rng` will do nothing. +If neither flag is set, seeds can be supplied by the application. [Multiple apis](https://docs.rs/ahash/latest/ahash/random_state/struct.RandomState.html) +are available to do this. + +## Comparison with other hashers + +A full comparison with other hashing algorithms can be found [here](https://github.com/tkaitchuck/aHash/blob/master/compare/readme.md) + +![Hasher performance](https://docs.google.com/spreadsheets/d/e/2PACX-1vSK7Li2nS-Bur9arAYF9IfT37MP-ohAe1v19lZu5fd9MajI1fSveLAQZyEie4Ea9k5-SWHTff7nL2DW/pubchart?oid=1323618938&format=image) + +For a more representative performance comparison which includes the overhead of using a HashMap, see [HashBrown's benchmarks](https://github.com/rust-lang/hashbrown#performance) +as HashBrown now uses aHash as its hasher by default. + +## Hash quality + +AHash passes the full [SMHasher test suite](https://github.com/rurban/smhasher). + +The code to reproduce the result, and the full output [are checked into the repo](https://github.com/tkaitchuck/aHash/tree/master/smhasher). + +## Additional FAQ + +A separate FAQ document is maintained [here](https://github.com/tkaitchuck/aHash/blob/master/FAQ.md). +If you have questions not covered there, open an issue [here](https://github.com/tkaitchuck/aHash/issues). + +## License + +Licensed under either of: + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +## Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any +additional terms or conditions. + + + + + + + + diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..0c5b769 --- /dev/null +++ b/build.rs @@ -0,0 +1,23 @@ +#![deny(warnings)] + +use std::env; + +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + if let Some(true) = version_check::supports_feature("specialize") { + println!("cargo:rustc-cfg=feature=\"specialize\""); + } + if let Some(true) = version_check::supports_feature("stdsimd") { + println!("cargo:rustc-cfg=feature=\"stdsimd\""); + } + let arch = env::var("CARGO_CFG_TARGET_ARCH").expect("CARGO_CFG_TARGET_ARCH was not set"); + if arch.eq_ignore_ascii_case("x86_64") + || arch.eq_ignore_ascii_case("aarch64") + || arch.eq_ignore_ascii_case("mips64") + || arch.eq_ignore_ascii_case("powerpc64") + || arch.eq_ignore_ascii_case("riscv64gc") + || arch.eq_ignore_ascii_case("s390x") + { + println!("cargo:rustc-cfg=feature=\"folded_multiply\""); + } +} diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..7530651 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +max_width = 120 diff --git a/src/aes_hash.rs b/src/aes_hash.rs new file mode 100644 index 0000000..702044e --- /dev/null +++ b/src/aes_hash.rs @@ -0,0 +1,433 @@ +use crate::convert::*; +use crate::operations::*; +use crate::random_state::PI; +use crate::RandomState; +use core::hash::Hasher; + +/// A `Hasher` for hashing an arbitrary stream of bytes. +/// +/// Instances of [`AHasher`] represent state that is updated while hashing data. +/// +/// Each method updates the internal state based on the new data provided. Once +/// all of the data has been provided, the resulting hash can be obtained by calling +/// `finish()` +/// +/// [Clone] is also provided in case you wish to calculate hashes for two different items that +/// start with the same data. +/// +#[derive(Debug, Clone)] +pub struct AHasher { + enc: u128, + sum: u128, + key: u128, +} + +impl AHasher { + /// Creates a new hasher keyed to the provided keys. + /// + /// Normally hashers are created via `AHasher::default()` for fixed keys or `RandomState::new()` for randomly + /// generated keys and `RandomState::with_seeds(a,b)` for seeds that are set and can be reused. All of these work at + /// map creation time (and hence don't have any overhead on a per-item bais). + /// + /// This method directly creates the hasher instance and performs no transformation on the provided seeds. This may + /// be useful where a HashBuilder is not desired, such as for testing purposes. + /// + /// # Example + /// + /// ``` + /// use std::hash::Hasher; + /// use ahash::AHasher; + /// + /// let mut hasher = AHasher::new_with_keys(1234, 5678); + /// + /// hasher.write_u32(1989); + /// hasher.write_u8(11); + /// hasher.write_u8(9); + /// hasher.write(b"Huh?"); + /// + /// println!("Hash is {:x}!", hasher.finish()); + /// ``` + #[inline] + pub(crate) fn new_with_keys(key1: u128, key2: u128) -> Self { + let pi: [u128; 2] = PI.convert(); + let key1 = key1 ^ pi[0]; + let key2 = key2 ^ pi[1]; + Self { + enc: key1, + sum: key2, + key: key1 ^ key2, + } + } + + #[allow(unused)] // False positive + pub(crate) fn test_with_keys(key1: u128, key2: u128) -> Self { + Self { + enc: key1, + sum: key2, + key: key1 ^ key2, + } + } + + #[inline] + pub(crate) fn from_random_state(rand_state: &RandomState) -> Self { + let key1 = [rand_state.k0, rand_state.k1].convert(); + let key2 = [rand_state.k2, rand_state.k3].convert(); + Self { + enc: key1, + sum: key2, + key: key1 ^ key2, + } + } + + #[inline(always)] + fn hash_in(&mut self, new_value: u128) { + self.enc = aesenc(self.enc, new_value); + self.sum = shuffle_and_add(self.sum, new_value); + } + + #[inline(always)] + fn hash_in_2(&mut self, v1: u128, v2: u128) { + self.enc = aesenc(self.enc, v1); + self.sum = shuffle_and_add(self.sum, v1); + self.enc = aesenc(self.enc, v2); + self.sum = shuffle_and_add(self.sum, v2); + } + + #[inline] + #[cfg(feature = "specialize")] + fn short_finish(&self) -> u64 { + let combined = aesdec(self.sum, self.enc); + let result: [u64; 2] = aesenc(combined, combined).convert(); + result[0] + } +} + +/// Provides [Hasher] methods to hash all of the primitive types. +/// +/// [Hasher]: core::hash::Hasher +impl Hasher for AHasher { + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.hash_in(i); + } + + #[inline] + #[cfg(any( + target_pointer_width = "64", + target_pointer_width = "32", + target_pointer_width = "16" + ))] + fn write_usize(&mut self, i: usize) { + self.write_u64(i as u64); + } + + #[inline] + #[cfg(target_pointer_width = "128")] + fn write_usize(&mut self, i: usize) { + self.write_u128(i as u128); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.write_u128(i as u128); + } + + #[inline] + #[allow(clippy::collapsible_if)] + fn write(&mut self, input: &[u8]) { + let mut data = input; + let length = data.len(); + add_in_length(&mut self.enc, length as u64); + + //A 'binary search' on sizes reduces the number of comparisons. + if data.len() <= 8 { + let value = read_small(data); + self.hash_in(value.convert()); + } else { + if data.len() > 32 { + if data.len() > 64 { + let tail = data.read_last_u128x4(); + let mut current: [u128; 4] = [self.key; 4]; + current[0] = aesenc(current[0], tail[0]); + current[1] = aesenc(current[1], tail[1]); + current[2] = aesenc(current[2], tail[2]); + current[3] = aesenc(current[3], tail[3]); + let mut sum: [u128; 2] = [self.key, self.key]; + sum[0] = add_by_64s(sum[0].convert(), tail[0].convert()).convert(); + sum[1] = add_by_64s(sum[1].convert(), tail[1].convert()).convert(); + sum[0] = shuffle_and_add(sum[0], tail[2]); + sum[1] = shuffle_and_add(sum[1], tail[3]); + while data.len() > 64 { + let (blocks, rest) = data.read_u128x4(); + current[0] = aesenc(current[0], blocks[0]); + current[1] = aesenc(current[1], blocks[1]); + current[2] = aesenc(current[2], blocks[2]); + current[3] = aesenc(current[3], blocks[3]); + sum[0] = shuffle_and_add(sum[0], blocks[0]); + sum[1] = shuffle_and_add(sum[1], blocks[1]); + sum[0] = shuffle_and_add(sum[0], blocks[2]); + sum[1] = shuffle_and_add(sum[1], blocks[3]); + data = rest; + } + self.hash_in_2(aesenc(current[0], current[1]), aesenc(current[2], current[3])); + self.hash_in(add_by_64s(sum[0].convert(), sum[1].convert()).convert()); + } else { + //len 33-64 + let (head, _) = data.read_u128x2(); + let tail = data.read_last_u128x2(); + self.hash_in_2(head[0], head[1]); + self.hash_in_2(tail[0], tail[1]); + } + } else { + if data.len() > 16 { + //len 17-32 + self.hash_in_2(data.read_u128().0, data.read_last_u128()); + } else { + //len 9-16 + let value: [u64; 2] = [data.read_u64().0, data.read_last_u64()]; + self.hash_in(value.convert()); + } + } + } + } + #[inline] + fn finish(&self) -> u64 { + let combined = aesdec(self.sum, self.enc); + let result: [u64; 2] = aesenc(aesenc(combined, self.key), combined).convert(); + result[0] + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherU64 { + pub(crate) buffer: u64, + pub(crate) pad: u64, +} + +/// A specialized hasher for only primitives under 64 bits. +#[cfg(feature = "specialize")] +impl Hasher for AHasherU64 { + #[inline] + fn finish(&self) -> u64 { + let rot = (self.pad & 63) as u32; + self.buffer.rotate_left(rot) + } + + #[inline] + fn write(&mut self, _bytes: &[u8]) { + unreachable!("Specialized hasher was called with a different type of object") + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.buffer = folded_multiply(i ^ self.buffer, MULTIPLE); + } + + #[inline] + fn write_u128(&mut self, _i: u128) { + unreachable!("Specialized hasher was called with a different type of object") + } + + #[inline] + fn write_usize(&mut self, _i: usize) { + unreachable!("Specialized hasher was called with a different type of object") + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherFixed(pub AHasher); + +/// A specialized hasher for fixed size primitives larger than 64 bits. +#[cfg(feature = "specialize")] +impl Hasher for AHasherFixed { + #[inline] + fn finish(&self) -> u64 { + self.0.short_finish() + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + self.0.write(bytes) + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.0.write_u64(i); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.0.write_u128(i); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.0.write_usize(i); + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherStr(pub AHasher); + +/// A specialized hasher for strings +/// Note that the other types don't panic because the hash impl for String tacks on an unneeded call. (As does vec) +#[cfg(feature = "specialize")] +impl Hasher for AHasherStr { + #[inline] + fn finish(&self) -> u64 { + let result: [u64; 2] = self.0.enc.convert(); + result[0] + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + if bytes.len() > 8 { + self.0.write(bytes); + self.0.enc = aesdec(self.0.sum, self.0.enc); + self.0.enc = aesenc(aesenc(self.0.enc, self.0.key), self.0.enc); + } else { + add_in_length(&mut self.0.enc, bytes.len() as u64); + + let value = read_small(bytes).convert(); + self.0.sum = shuffle_and_add(self.0.sum, value); + self.0.enc = aesdec(self.0.sum, self.0.enc); + self.0.enc = aesenc(aesenc(self.0.enc, self.0.key), self.0.enc); + } + } + + #[inline] + fn write_u8(&mut self, _i: u8) {} + + #[inline] + fn write_u16(&mut self, _i: u16) {} + + #[inline] + fn write_u32(&mut self, _i: u32) {} + + #[inline] + fn write_u64(&mut self, _i: u64) {} + + #[inline] + fn write_u128(&mut self, _i: u128) {} + + #[inline] + fn write_usize(&mut self, _i: usize) {} +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::convert::Convert; + use crate::operations::aesenc; + use crate::RandomState; + use std::hash::{BuildHasher, Hasher}; + #[test] + fn test_sanity() { + let mut hasher = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); + hasher.write_u64(0); + let h1 = hasher.finish(); + hasher.write(&[1, 0, 0, 0, 0, 0, 0, 0]); + let h2 = hasher.finish(); + assert_ne!(h1, h2); + } + + #[cfg(feature = "compile-time-rng")] + #[test] + fn test_builder() { + use std::collections::HashMap; + use std::hash::BuildHasherDefault; + + let mut map = HashMap::<u32, u64, BuildHasherDefault<AHasher>>::default(); + map.insert(1, 3); + } + + #[cfg(feature = "compile-time-rng")] + #[test] + fn test_default() { + let hasher_a = AHasher::default(); + let a_enc: [u64; 2] = hasher_a.enc.convert(); + let a_sum: [u64; 2] = hasher_a.sum.convert(); + assert_ne!(0, a_enc[0]); + assert_ne!(0, a_enc[1]); + assert_ne!(0, a_sum[0]); + assert_ne!(0, a_sum[1]); + assert_ne!(a_enc[0], a_enc[1]); + assert_ne!(a_sum[0], a_sum[1]); + assert_ne!(a_enc[0], a_sum[0]); + assert_ne!(a_enc[1], a_sum[1]); + let hasher_b = AHasher::default(); + let b_enc: [u64; 2] = hasher_b.enc.convert(); + let b_sum: [u64; 2] = hasher_b.sum.convert(); + assert_eq!(a_enc[0], b_enc[0]); + assert_eq!(a_enc[1], b_enc[1]); + assert_eq!(a_sum[0], b_sum[0]); + assert_eq!(a_sum[1], b_sum[1]); + } + + #[test] + fn test_hash() { + let mut result: [u64; 2] = [0x6c62272e07bb0142, 0x62b821756295c58d]; + let value: [u64; 2] = [1 << 32, 0xFEDCBA9876543210]; + result = aesenc(value.convert(), result.convert()).convert(); + result = aesenc(result.convert(), result.convert()).convert(); + let mut result2: [u64; 2] = [0x6c62272e07bb0142, 0x62b821756295c58d]; + let value2: [u64; 2] = [1, 0xFEDCBA9876543210]; + result2 = aesenc(value2.convert(), result2.convert()).convert(); + result2 = aesenc(result2.convert(), result.convert()).convert(); + let result: [u8; 16] = result.convert(); + let result2: [u8; 16] = result2.convert(); + assert_ne!(hex::encode(result), hex::encode(result2)); + } + + #[test] + fn test_conversion() { + let input: &[u8] = "dddddddd".as_bytes(); + let bytes: u64 = as_array!(input, 8).convert(); + assert_eq!(bytes, 0x6464646464646464); + } +} diff --git a/src/convert.rs b/src/convert.rs new file mode 100644 index 0000000..fc47baa --- /dev/null +++ b/src/convert.rs @@ -0,0 +1,163 @@ +pub(crate) trait Convert<To> { + fn convert(self) -> To; +} + +macro_rules! convert { + ($a:ty, $b:ty) => { + impl Convert<$b> for $a { + #[inline(always)] + fn convert(self) -> $b { + unsafe { core::mem::transmute::<$a, $b>(self) } + } + } + impl Convert<$a> for $b { + #[inline(always)] + fn convert(self) -> $a { + unsafe { core::mem::transmute::<$b, $a>(self) } + } + } + }; +} + +convert!([u128; 4], [u64; 8]); +convert!([u128; 4], [u32; 16]); +convert!([u128; 4], [u16; 32]); +convert!([u128; 4], [u8; 64]); +convert!([u128; 2], [u64; 4]); +convert!([u128; 2], [u32; 8]); +convert!([u128; 2], [u16; 16]); +convert!([u128; 2], [u8; 32]); +convert!(u128, [u64; 2]); +convert!(u128, [u32; 4]); +convert!(u128, [u16; 8]); +convert!(u128, [u8; 16]); +convert!([u64; 8], [u32; 16]); +convert!([u64; 8], [u16; 32]); +convert!([u64; 8], [u8; 64]); +convert!([u64; 4], [u32; 8]); +convert!([u64; 4], [u16; 16]); +convert!([u64; 4], [u8; 32]); +convert!([u64; 2], [u32; 4]); +convert!([u64; 2], [u16; 8]); +convert!([u64; 2], [u8; 16]); +convert!([u32; 4], [u16; 8]); +convert!([u32; 4], [u8; 16]); +convert!([u16; 8], [u8; 16]); +convert!(u64, [u32; 2]); +convert!(u64, [u16; 4]); +convert!(u64, [u8; 8]); +convert!([u32; 2], [u16; 4]); +convert!([u32; 2], [u8; 8]); +convert!(u32, [u16; 2]); +convert!(u32, [u8; 4]); +convert!([u16; 2], [u8; 4]); +convert!(u16, [u8; 2]); +convert!([[u64; 4]; 2], [u8; 64]); + +convert!([f64; 2], [u8; 16]); +convert!([f32; 4], [u8; 16]); +convert!(f64, [u8; 8]); +convert!([f32; 2], [u8; 8]); +convert!(f32, [u8; 4]); + +macro_rules! as_array { + ($input:expr, $len:expr) => {{ + { + #[inline(always)] + fn as_array<T>(slice: &[T]) -> &[T; $len] { + assert_eq!(slice.len(), $len); + unsafe { &*(slice.as_ptr() as *const [_; $len]) } + } + as_array($input) + } + }}; +} + +pub(crate) trait ReadFromSlice { + fn read_u16(&self) -> (u16, &[u8]); + fn read_u32(&self) -> (u32, &[u8]); + fn read_u64(&self) -> (u64, &[u8]); + fn read_u128(&self) -> (u128, &[u8]); + fn read_u128x2(&self) -> ([u128; 2], &[u8]); + fn read_u128x4(&self) -> ([u128; 4], &[u8]); + fn read_last_u16(&self) -> u16; + fn read_last_u32(&self) -> u32; + fn read_last_u64(&self) -> u64; + fn read_last_u128(&self) -> u128; + fn read_last_u128x2(&self) -> [u128; 2]; + fn read_last_u128x4(&self) -> [u128; 4]; +} + +impl ReadFromSlice for [u8] { + #[inline(always)] + fn read_u16(&self) -> (u16, &[u8]) { + let (value, rest) = self.split_at(2); + (as_array!(value, 2).convert(), rest) + } + + #[inline(always)] + fn read_u32(&self) -> (u32, &[u8]) { + let (value, rest) = self.split_at(4); + (as_array!(value, 4).convert(), rest) + } + + #[inline(always)] + fn read_u64(&self) -> (u64, &[u8]) { + let (value, rest) = self.split_at(8); + (as_array!(value, 8).convert(), rest) + } + + #[inline(always)] + fn read_u128(&self) -> (u128, &[u8]) { + let (value, rest) = self.split_at(16); + (as_array!(value, 16).convert(), rest) + } + + #[inline(always)] + fn read_u128x2(&self) -> ([u128; 2], &[u8]) { + let (value, rest) = self.split_at(32); + (as_array!(value, 32).convert(), rest) + } + + #[inline(always)] + fn read_u128x4(&self) -> ([u128; 4], &[u8]) { + let (value, rest) = self.split_at(64); + (as_array!(value, 64).convert(), rest) + } + + #[inline(always)] + fn read_last_u16(&self) -> u16 { + let (_, value) = self.split_at(self.len() - 2); + as_array!(value, 2).convert() + } + + #[inline(always)] + fn read_last_u32(&self) -> u32 { + let (_, value) = self.split_at(self.len() - 4); + as_array!(value, 4).convert() + } + + #[inline(always)] + fn read_last_u64(&self) -> u64 { + let (_, value) = self.split_at(self.len() - 8); + as_array!(value, 8).convert() + } + + #[inline(always)] + fn read_last_u128(&self) -> u128 { + let (_, value) = self.split_at(self.len() - 16); + as_array!(value, 16).convert() + } + + #[inline(always)] + fn read_last_u128x2(&self) -> [u128; 2] { + let (_, value) = self.split_at(self.len() - 32); + as_array!(value, 32).convert() + } + + #[inline(always)] + fn read_last_u128x4(&self) -> [u128; 4] { + let (_, value) = self.split_at(self.len() - 64); + as_array!(value, 64).convert() + } +} diff --git a/src/fallback_hash.rs b/src/fallback_hash.rs new file mode 100644 index 0000000..f78074d --- /dev/null +++ b/src/fallback_hash.rs @@ -0,0 +1,368 @@ +use crate::convert::*; +use crate::operations::folded_multiply; +use crate::operations::read_small; +use crate::operations::MULTIPLE; +use crate::random_state::PI; +use crate::RandomState; +use core::hash::Hasher; + +const ROT: u32 = 23; //17 + +/// A `Hasher` for hashing an arbitrary stream of bytes. +/// +/// Instances of [`AHasher`] represent state that is updated while hashing data. +/// +/// Each method updates the internal state based on the new data provided. Once +/// all of the data has been provided, the resulting hash can be obtained by calling +/// `finish()` +/// +/// [Clone] is also provided in case you wish to calculate hashes for two different items that +/// start with the same data. +/// +#[derive(Debug, Clone)] +pub struct AHasher { + buffer: u64, + pad: u64, + extra_keys: [u64; 2], +} + +impl AHasher { + /// Creates a new hasher keyed to the provided key. + #[inline] + #[allow(dead_code)] // Is not called if non-fallback hash is used. + pub(crate) fn new_with_keys(key1: u128, key2: u128) -> AHasher { + let pi: [u128; 2] = PI.convert(); + let key1: [u64; 2] = (key1 ^ pi[0]).convert(); + let key2: [u64; 2] = (key2 ^ pi[1]).convert(); + AHasher { + buffer: key1[0], + pad: key1[1], + extra_keys: key2, + } + } + + #[allow(unused)] // False positive + pub(crate) fn test_with_keys(key1: u128, key2: u128) -> Self { + let key1: [u64; 2] = key1.convert(); + let key2: [u64; 2] = key2.convert(); + Self { + buffer: key1[0], + pad: key1[1], + extra_keys: key2, + } + } + + #[inline] + #[allow(dead_code)] // Is not called if non-fallback hash is used. + pub(crate) fn from_random_state(rand_state: &RandomState) -> AHasher { + AHasher { + buffer: rand_state.k0, + pad: rand_state.k1, + extra_keys: [rand_state.k2, rand_state.k3], + } + } + + /// This update function has the goal of updating the buffer with a single multiply + /// FxHash does this but is vulnerable to attack. To avoid this input needs to be masked to with an + /// unpredictable value. Other hashes such as murmurhash have taken this approach but were found vulnerable + /// to attack. The attack was based on the idea of reversing the pre-mixing (Which is necessarily + /// reversible otherwise bits would be lost) then placing a difference in the highest bit before the + /// multiply used to mix the data. Because a multiply can never affect the bits to the right of it, a + /// subsequent update that also differed in this bit could result in a predictable collision. + /// + /// This version avoids this vulnerability while still only using a single multiply. It takes advantage + /// of the fact that when a 64 bit multiply is performed the upper 64 bits are usually computed and thrown + /// away. Instead it creates two 128 bit values where the upper 64 bits are zeros and multiplies them. + /// (The compiler is smart enough to turn this into a 64 bit multiplication in the assembly) + /// Then the upper bits are xored with the lower bits to produce a single 64 bit result. + /// + /// To understand why this is a good scrambling function it helps to understand multiply-with-carry PRNGs: + /// https://en.wikipedia.org/wiki/Multiply-with-carry_pseudorandom_number_generator + /// If the multiple is chosen well, this creates a long period, decent quality PRNG. + /// Notice that this function is equivalent to this except the `buffer`/`state` is being xored with each + /// new block of data. In the event that data is all zeros, it is exactly equivalent to a MWC PRNG. + /// + /// This is impervious to attack because every bit buffer at the end is dependent on every bit in + /// `new_data ^ buffer`. For example suppose two inputs differed in only the 5th bit. Then when the + /// multiplication is performed the `result` will differ in bits 5-69. More specifically it will differ by + /// 2^5 * MULTIPLE. However in the next step bits 65-128 are turned into a separate 64 bit value. So the + /// differing bits will be in the lower 6 bits of this value. The two intermediate values that differ in + /// bits 5-63 and in bits 0-5 respectively get added together. Producing an output that differs in every + /// bit. The addition carries in the multiplication and at the end additionally mean that the even if an + /// attacker somehow knew part of (but not all) the contents of the buffer before hand, + /// they would not be able to predict any of the bits in the buffer at the end. + #[inline(always)] + fn update(&mut self, new_data: u64) { + self.buffer = folded_multiply(new_data ^ self.buffer, MULTIPLE); + } + + /// Similar to the above this function performs an update using a "folded multiply". + /// However it takes in 128 bits of data instead of 64. Both halves must be masked. + /// + /// This makes it impossible for an attacker to place a single bit difference between + /// two blocks so as to cancel each other. + /// + /// However this is not sufficient. to prevent (a,b) from hashing the same as (b,a) the buffer itself must + /// be updated between calls in a way that does not commute. To achieve this XOR and Rotate are used. + /// Add followed by xor is not the same as xor followed by add, and rotate ensures that the same out bits + /// can't be changed by the same set of input bits. To cancel this sequence with subsequent input would require + /// knowing the keys. + #[inline(always)] + fn large_update(&mut self, new_data: u128) { + let block: [u64; 2] = new_data.convert(); + let combined = folded_multiply(block[0] ^ self.extra_keys[0], block[1] ^ self.extra_keys[1]); + self.buffer = (self.buffer.wrapping_add(self.pad) ^ combined).rotate_left(ROT); + } + + #[inline] + #[cfg(feature = "specialize")] + fn short_finish(&self) -> u64 { + self.buffer.wrapping_add(self.pad) + } +} + +/// Provides [Hasher] methods to hash all of the primitive types. +/// +/// [Hasher]: core::hash::Hasher +impl Hasher for AHasher { + #[inline] + fn write_u8(&mut self, i: u8) { + self.update(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.update(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.update(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.update(i as u64); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.large_update(i); + } + + #[inline] + #[cfg(any( + target_pointer_width = "64", + target_pointer_width = "32", + target_pointer_width = "16" + ))] + fn write_usize(&mut self, i: usize) { + self.write_u64(i as u64); + } + + #[inline] + #[cfg(target_pointer_width = "128")] + fn write_usize(&mut self, i: usize) { + self.write_u128(i as u128); + } + + #[inline] + #[allow(clippy::collapsible_if)] + fn write(&mut self, input: &[u8]) { + let mut data = input; + let length = data.len() as u64; + //Needs to be an add rather than an xor because otherwise it could be canceled with carefully formed input. + self.buffer = self.buffer.wrapping_add(length).wrapping_mul(MULTIPLE); + //A 'binary search' on sizes reduces the number of comparisons. + if data.len() > 8 { + if data.len() > 16 { + let tail = data.read_last_u128(); + self.large_update(tail); + while data.len() > 16 { + let (block, rest) = data.read_u128(); + self.large_update(block); + data = rest; + } + } else { + self.large_update([data.read_u64().0, data.read_last_u64()].convert()); + } + } else { + let value = read_small(data); + self.large_update(value.convert()); + } + } + + #[inline] + fn finish(&self) -> u64 { + let rot = (self.buffer & 63) as u32; + folded_multiply(self.buffer, self.pad).rotate_left(rot) + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherU64 { + pub(crate) buffer: u64, + pub(crate) pad: u64, +} + +/// A specialized hasher for only primitives under 64 bits. +#[cfg(feature = "specialize")] +impl Hasher for AHasherU64 { + #[inline] + fn finish(&self) -> u64 { + let rot = (self.pad & 63) as u32; + self.buffer.rotate_left(rot) + } + + #[inline] + fn write(&mut self, _bytes: &[u8]) { + unreachable!("Specialized hasher was called with a different type of object") + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.buffer = folded_multiply(i ^ self.buffer, MULTIPLE); + } + + #[inline] + fn write_u128(&mut self, _i: u128) { + unreachable!("Specialized hasher was called with a different type of object") + } + + #[inline] + fn write_usize(&mut self, _i: usize) { + unreachable!("Specialized hasher was called with a different type of object") + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherFixed(pub AHasher); + +/// A specialized hasher for fixed size primitives larger than 64 bits. +#[cfg(feature = "specialize")] +impl Hasher for AHasherFixed { + #[inline] + fn finish(&self) -> u64 { + self.0.short_finish() + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + self.0.write(bytes) + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.0.write_u64(i); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.0.write_u128(i); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.0.write_usize(i); + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherStr(pub AHasher); + +/// A specialized hasher for a single string +/// Note that the other types don't panic because the hash impl for String tacks on an unneeded call. (As does vec) +#[cfg(feature = "specialize")] +impl Hasher for AHasherStr { + #[inline] + fn finish(&self) -> u64 { + self.0.finish() + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + if bytes.len() > 8 { + self.0.write(bytes) + } else { + let value = read_small(bytes); + self.0.buffer = folded_multiply(value[0] ^ self.0.buffer, value[1] ^ self.0.extra_keys[1]); + self.0.pad = self.0.pad.wrapping_add(bytes.len() as u64); + } + } + + #[inline] + fn write_u8(&mut self, _i: u8) {} + + #[inline] + fn write_u16(&mut self, _i: u16) {} + + #[inline] + fn write_u32(&mut self, _i: u32) {} + + #[inline] + fn write_u64(&mut self, _i: u64) {} + + #[inline] + fn write_u128(&mut self, _i: u128) {} + + #[inline] + fn write_usize(&mut self, _i: usize) {} +} + +#[cfg(test)] +mod tests { + use crate::convert::Convert; + use crate::fallback_hash::*; + + #[test] + fn test_hash() { + let mut hasher = AHasher::new_with_keys(0, 0); + let value: u64 = 1 << 32; + hasher.update(value); + let result = hasher.buffer; + let mut hasher = AHasher::new_with_keys(0, 0); + let value2: u64 = 1; + hasher.update(value2); + let result2 = hasher.buffer; + let result: [u8; 8] = result.convert(); + let result2: [u8; 8] = result2.convert(); + assert_ne!(hex::encode(result), hex::encode(result2)); + } + + #[test] + fn test_conversion() { + let input: &[u8] = "dddddddd".as_bytes(); + let bytes: u64 = as_array!(input, 8).convert(); + assert_eq!(bytes, 0x6464646464646464); + } +} diff --git a/src/hash_map.rs b/src/hash_map.rs new file mode 100644 index 0000000..2b6fbdc --- /dev/null +++ b/src/hash_map.rs @@ -0,0 +1,501 @@ +use std::borrow::Borrow; +use std::collections::hash_map::{IntoKeys, IntoValues}; +use std::collections::{hash_map, HashMap}; +use std::fmt::{self, Debug}; +use std::hash::{BuildHasher, Hash}; +use std::iter::FromIterator; +use std::ops::{Deref, DerefMut, Index}; +use std::panic::UnwindSafe; + +#[cfg(feature = "serde")] +use serde::{ + de::{Deserialize, Deserializer}, + ser::{Serialize, Serializer}, +}; + +use crate::RandomState; + +/// A [`HashMap`](std::collections::HashMap) using [`RandomState`](crate::RandomState) to hash the items. +/// (Requires the `std` feature to be enabled.) +#[derive(Clone)] +pub struct AHashMap<K, V, S = crate::RandomState>(HashMap<K, V, S>); + +impl<K, V> From<HashMap<K, V, crate::RandomState>> for AHashMap<K, V> { + fn from(item: HashMap<K, V, crate::RandomState>) -> Self { + AHashMap(item) + } +} + +impl<K, V, const N: usize> From<[(K, V); N]> for AHashMap<K, V> +where + K: Eq + Hash, +{ + /// # Examples + /// + /// ``` + /// use ahash::AHashMap; + /// + /// let map1 = AHashMap::from([(1, 2), (3, 4)]); + /// let map2: AHashMap<_, _> = [(1, 2), (3, 4)].into(); + /// assert_eq!(map1, map2); + /// ``` + fn from(arr: [(K, V); N]) -> Self { + Self::from_iter(arr) + } +} + +impl<K, V> Into<HashMap<K, V, crate::RandomState>> for AHashMap<K, V> { + fn into(self) -> HashMap<K, V, crate::RandomState> { + self.0 + } +} + +impl<K, V> AHashMap<K, V, RandomState> { + /// This crates a hashmap using [RandomState::new] which obtains its keys from [RandomSource]. + /// See the documentation in [RandomSource] for notes about key strength. + pub fn new() -> Self { + AHashMap(HashMap::with_hasher(RandomState::new())) + } + + /// This crates a hashmap with the specified capacity using [RandomState::new]. + /// See the documentation in [RandomSource] for notes about key strength. + pub fn with_capacity(capacity: usize) -> Self { + AHashMap(HashMap::with_capacity_and_hasher(capacity, RandomState::new())) + } +} + +impl<K, V, S> AHashMap<K, V, S> +where + S: BuildHasher, +{ + pub fn with_hasher(hash_builder: S) -> Self { + AHashMap(HashMap::with_hasher(hash_builder)) + } + + pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self { + AHashMap(HashMap::with_capacity_and_hasher(capacity, hash_builder)) + } +} + +impl<K, V, S> AHashMap<K, V, S> +where + K: Hash + Eq, + S: BuildHasher, +{ + /// Returns a reference to the value corresponding to the key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.get(&1), Some(&"a")); + /// assert_eq!(map.get(&2), None); + /// ``` + #[inline] + pub fn get<Q: ?Sized>(&self, k: &Q) -> Option<&V> + where + K: Borrow<Q>, + Q: Hash + Eq, + { + self.0.get(k) + } + + /// Returns the key-value pair corresponding to the supplied key. + /// + /// The supplied key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.get_key_value(&1), Some((&1, &"a"))); + /// assert_eq!(map.get_key_value(&2), None); + /// ``` + #[inline] + pub fn get_key_value<Q: ?Sized>(&self, k: &Q) -> Option<(&K, &V)> + where + K: Borrow<Q>, + Q: Hash + Eq, + { + self.0.get_key_value(k) + } + + /// Returns a mutable reference to the value corresponding to the key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// if let Some(x) = map.get_mut(&1) { + /// *x = "b"; + /// } + /// assert_eq!(map[&1], "b"); + /// ``` + #[inline] + pub fn get_mut<Q: ?Sized>(&mut self, k: &Q) -> Option<&mut V> + where + K: Borrow<Q>, + Q: Hash + Eq, + { + self.0.get_mut(k) + } + + /// Inserts a key-value pair into the map. + /// + /// If the map did not have this key present, [`None`] is returned. + /// + /// If the map did have this key present, the value is updated, and the old + /// value is returned. The key is not updated, though; this matters for + /// types that can be `==` without being identical. See the [module-level + /// documentation] for more. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// assert_eq!(map.insert(37, "a"), None); + /// assert_eq!(map.is_empty(), false); + /// + /// map.insert(37, "b"); + /// assert_eq!(map.insert(37, "c"), Some("b")); + /// assert_eq!(map[&37], "c"); + /// ``` + #[inline] + pub fn insert(&mut self, k: K, v: V) -> Option<V> { + self.0.insert(k, v) + } + + /// Creates a consuming iterator visiting all the keys in arbitrary order. + /// The map cannot be used after calling this. + /// The iterator element type is `K`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let map = HashMap::from([ + /// ("a", 1), + /// ("b", 2), + /// ("c", 3), + /// ]); + /// + /// let mut vec: Vec<&str> = map.into_keys().collect(); + /// // The `IntoKeys` iterator produces keys in arbitrary order, so the + /// // keys must be sorted to test them against a sorted array. + /// vec.sort_unstable(); + /// assert_eq!(vec, ["a", "b", "c"]); + /// ``` + /// + /// # Performance + /// + /// In the current implementation, iterating over keys takes O(capacity) time + /// instead of O(len) because it internally visits empty buckets too. + #[inline] + pub fn into_keys(self) -> IntoKeys<K, V> { + self.0.into_keys() + } + + /// Creates a consuming iterator visiting all the values in arbitrary order. + /// The map cannot be used after calling this. + /// The iterator element type is `V`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let map = HashMap::from([ + /// ("a", 1), + /// ("b", 2), + /// ("c", 3), + /// ]); + /// + /// let mut vec: Vec<i32> = map.into_values().collect(); + /// // The `IntoValues` iterator produces values in arbitrary order, so + /// // the values must be sorted to test them against a sorted array. + /// vec.sort_unstable(); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + /// + /// # Performance + /// + /// In the current implementation, iterating over values takes O(capacity) time + /// instead of O(len) because it internally visits empty buckets too. + #[inline] + pub fn into_values(self) -> IntoValues<K, V> { + self.0.into_values() + } + + /// Removes a key from the map, returning the value at the key if the key + /// was previously in the map. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.remove(&1), Some("a")); + /// assert_eq!(map.remove(&1), None); + /// ``` + #[inline] + pub fn remove<Q: ?Sized>(&mut self, k: &Q) -> Option<V> + where + K: Borrow<Q>, + Q: Hash + Eq, + { + self.0.remove(k) + } +} + +impl<K, V, S> Deref for AHashMap<K, V, S> { + type Target = HashMap<K, V, S>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<K, V, S> DerefMut for AHashMap<K, V, S> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<K, V, S> UnwindSafe for AHashMap<K, V, S> +where + K: UnwindSafe, + V: UnwindSafe, +{ +} + +impl<K, V, S> PartialEq for AHashMap<K, V, S> +where + K: Eq + Hash, + V: PartialEq, + S: BuildHasher, +{ + fn eq(&self, other: &AHashMap<K, V, S>) -> bool { + self.0.eq(&other.0) + } +} + +impl<K, V, S> Eq for AHashMap<K, V, S> +where + K: Eq + Hash, + V: Eq, + S: BuildHasher, +{ +} + +impl<K, Q: ?Sized, V, S> Index<&Q> for AHashMap<K, V, S> +where + K: Eq + Hash + Borrow<Q>, + Q: Eq + Hash, + S: BuildHasher, +{ + type Output = V; + + /// Returns a reference to the value corresponding to the supplied key. + /// + /// # Panics + /// + /// Panics if the key is not present in the `HashMap`. + #[inline] + fn index(&self, key: &Q) -> &V { + self.0.index(key) + } +} + +impl<K, V, S> Debug for AHashMap<K, V, S> +where + K: Debug, + V: Debug, + S: BuildHasher, +{ + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(fmt) + } +} + +impl<K, V> FromIterator<(K, V)> for AHashMap<K, V, RandomState> +where + K: Eq + Hash, +{ + /// This crates a hashmap from the provided iterator using [RandomState::new]. + /// See the documentation in [RandomSource] for notes about key strength. + fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self { + let mut inner = HashMap::with_hasher(RandomState::new()); + inner.extend(iter); + AHashMap(inner) + } +} + +impl<'a, K, V, S> IntoIterator for &'a AHashMap<K, V, S> { + type Item = (&'a K, &'a V); + type IntoIter = hash_map::Iter<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + (&self.0).iter() + } +} + +impl<'a, K, V, S> IntoIterator for &'a mut AHashMap<K, V, S> { + type Item = (&'a K, &'a mut V); + type IntoIter = hash_map::IterMut<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + (&mut self.0).iter_mut() + } +} + +impl<K, V, S> IntoIterator for AHashMap<K, V, S> { + type Item = (K, V); + type IntoIter = hash_map::IntoIter<K, V>; + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl<K, V, S> Extend<(K, V)> for AHashMap<K, V, S> +where + K: Eq + Hash, + S: BuildHasher, +{ + #[inline] + fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) { + self.0.extend(iter) + } +} + +impl<'a, K, V, S> Extend<(&'a K, &'a V)> for AHashMap<K, V, S> +where + K: Eq + Hash + Copy + 'a, + V: Copy + 'a, + S: BuildHasher, +{ + #[inline] + fn extend<T: IntoIterator<Item = (&'a K, &'a V)>>(&mut self, iter: T) { + self.0.extend(iter) + } +} + +/// NOTE: For safety this trait impl is only available available if either of the flags `runtime-rng` (on by default) or +/// `compile-time-rng` are enabled. This is to prevent weakly keyed maps from being accidentally created. Instead one of +/// constructors for [RandomState] must be used. +#[cfg(any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng"))] +impl<K, V> Default for AHashMap<K, V, RandomState> { + #[inline] + fn default() -> AHashMap<K, V, RandomState> { + AHashMap(HashMap::default()) + } +} + +#[cfg(feature = "serde")] +impl<K, V> Serialize for AHashMap<K, V> +where + K: Serialize + Eq + Hash, + V: Serialize, +{ + fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { + self.deref().serialize(serializer) + } +} + +#[cfg(feature = "serde")] +impl<'de, K, V> Deserialize<'de> for AHashMap<K, V> +where + K: Deserialize<'de> + Eq + Hash, + V: Deserialize<'de>, +{ + fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> { + let hash_map = HashMap::deserialize(deserializer); + hash_map.map(|hash_map| Self(hash_map)) + } + + fn deserialize_in_place<D: Deserializer<'de>>(deserializer: D, place: &mut Self) -> Result<(), D::Error> { + use serde::de::{MapAccess, Visitor}; + + struct MapInPlaceVisitor<'a, K: 'a, V: 'a>(&'a mut AHashMap<K, V>); + + impl<'a, 'de, K, V> Visitor<'de> for MapInPlaceVisitor<'a, K, V> + where + K: Deserialize<'de> + Eq + Hash, + V: Deserialize<'de>, + { + type Value = (); + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a map") + } + + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> + where + A: MapAccess<'de>, + { + self.0.clear(); + self.0.reserve(map.size_hint().unwrap_or(0).min(4096)); + + while let Some((key, value)) = map.next_entry()? { + self.0.insert(key, value); + } + + Ok(()) + } + } + + deserializer.deserialize_map(MapInPlaceVisitor(place)) + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_borrow() { + let mut map: AHashMap<String, String> = AHashMap::new(); + map.insert("foo".to_string(), "Bar".to_string()); + map.insert("Bar".to_string(), map.get("foo").unwrap().to_owned()); + } + + #[cfg(feature = "serde")] + #[test] + fn test_serde() { + let mut map = AHashMap::new(); + map.insert("for".to_string(), 0); + map.insert("bar".to_string(), 1); + let mut serialization = serde_json::to_string(&map).unwrap(); + let mut deserialization: AHashMap<String, u64> = serde_json::from_str(&serialization).unwrap(); + assert_eq!(deserialization, map); + + map.insert("baz".to_string(), 2); + serialization = serde_json::to_string(&map).unwrap(); + let mut deserializer = serde_json::Deserializer::from_str(&serialization); + AHashMap::deserialize_in_place(&mut deserializer, &mut deserialization).unwrap(); + assert_eq!(deserialization, map); + } +} diff --git a/src/hash_quality_test.rs b/src/hash_quality_test.rs new file mode 100644 index 0000000..8f13d24 --- /dev/null +++ b/src/hash_quality_test.rs @@ -0,0 +1,504 @@ +use core::hash::{Hash, Hasher}; +use std::collections::HashMap; + +fn assert_sufficiently_different(a: u64, b: u64, tolerance: i32) { + let (same_byte_count, same_nibble_count) = count_same_bytes_and_nibbles(a, b); + assert!(same_byte_count <= tolerance, "{:x} vs {:x}: {:}", a, b, same_byte_count); + assert!( + same_nibble_count <= tolerance * 3, + "{:x} vs {:x}: {:}", + a, + b, + same_nibble_count + ); + let flipped_bits = (a ^ b).count_ones(); + assert!( + flipped_bits > 12 && flipped_bits < 52, + "{:x} and {:x}: {:}", + a, + b, + flipped_bits + ); + for rotate in 0..64 { + let flipped_bits2 = (a ^ (b.rotate_left(rotate))).count_ones(); + assert!( + flipped_bits2 > 10 && flipped_bits2 < 54, + "{:x} and {:x}: {:}", + a, + b.rotate_left(rotate), + flipped_bits2 + ); + } +} + +fn count_same_bytes_and_nibbles(a: u64, b: u64) -> (i32, i32) { + let mut same_byte_count = 0; + let mut same_nibble_count = 0; + for byte in 0..8 { + let ba = (a >> (8 * byte)) as u8; + let bb = (b >> (8 * byte)) as u8; + if ba == bb { + same_byte_count += 1; + } + if ba & 0xF0u8 == bb & 0xF0u8 { + same_nibble_count += 1; + } + if ba & 0x0Fu8 == bb & 0x0Fu8 { + same_nibble_count += 1; + } + } + (same_byte_count, same_nibble_count) +} + +fn gen_combinations(options: &[u32; 11], depth: u32, so_far: Vec<u32>, combinations: &mut Vec<Vec<u32>>) { + if depth == 0 { + return; + } + for option in options { + let mut next = so_far.clone(); + next.push(*option); + combinations.push(next.clone()); + gen_combinations(options, depth - 1, next, combinations); + } +} + +fn test_no_full_collisions<T: Hasher>(gen_hash: impl Fn() -> T) { + let options: [u32; 11] = [ + 0x00000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0xF0000000, + 1, 2, 4, 8, 15 + ]; + let mut combinations = Vec::new(); + gen_combinations(&options, 7, Vec::new(), &mut combinations); + let mut map: HashMap<u64, Vec<u8>> = HashMap::new(); + for combination in combinations { + let array = unsafe { + let (begin, middle, end) = combination.align_to::<u8>(); + assert_eq!(0, begin.len()); + assert_eq!(0, end.len()); + middle.to_vec() + }; + let mut hasher = gen_hash(); + hasher.write(&array); + let hash = hasher.finish(); + if let Some(value) = map.get(&hash) { + assert_eq!( + value, &array, + "Found a collision between {:x?} and {:x?}. Hash: {:x?}", + value, &array, &hash + ); + } else { + map.insert(hash, array); + } + } + assert_eq!(21435887, map.len()); //11^7 + 11^6 ... +} + +fn test_keys_change_output<T: Hasher>(constructor: impl Fn(u128, u128) -> T) { + let mut a = constructor(1, 1); + let mut b = constructor(1, 2); + let mut c = constructor(2, 1); + let mut d = constructor(2, 2); + "test".hash(&mut a); + "test".hash(&mut b); + "test".hash(&mut c); + "test".hash(&mut d); + assert_sufficiently_different(a.finish(), b.finish(), 1); + assert_sufficiently_different(a.finish(), c.finish(), 1); + assert_sufficiently_different(a.finish(), d.finish(), 1); + assert_sufficiently_different(b.finish(), c.finish(), 1); + assert_sufficiently_different(b.finish(), d.finish(), 1); + assert_sufficiently_different(c.finish(), d.finish(), 1); +} + +fn test_input_affect_every_byte<T: Hasher>(constructor: impl Fn(u128, u128) -> T) { + let base = hash_with(&0, constructor(0, 0)); + for shift in 0..16 { + let mut alternitives = vec![]; + for v in 0..256 { + let input = (v as u128) << (shift * 8); + let hasher = constructor(0, 0); + alternitives.push(hash_with(&input, hasher)); + } + assert_each_byte_differs(shift, base, alternitives); + } +} + +///Ensures that for every bit in the output there is some value for each byte in the key that flips it. +fn test_keys_affect_every_byte<H: Hash, T: Hasher>(item: H, constructor: impl Fn(u128, u128) -> T) { + let base = hash_with(&item, constructor(0, 0)); + for shift in 0..16 { + let mut alternitives1 = vec![]; + let mut alternitives2 = vec![]; + for v in 0..256 { + let input = (v as u128) << (shift * 8); + let hasher1 = constructor(input, 0); + let hasher2 = constructor(0, input); + let h1 = hash_with(&item, hasher1); + let h2 = hash_with(&item, hasher2); + alternitives1.push(h1); + alternitives2.push(h2); + } + assert_each_byte_differs(shift, base, alternitives1); + assert_each_byte_differs(shift, base, alternitives2); + } +} + +fn assert_each_byte_differs(num: u64, base: u64, alternitives: Vec<u64>) { + let mut changed_bits = 0_u64; + for alternitive in alternitives { + changed_bits |= base ^ alternitive + } + assert_eq!( + core::u64::MAX, + changed_bits, + "Bits changed: {:x} on num: {:?}. base {:x}", + changed_bits, + num, + base + ); +} + +fn test_finish_is_consistent<T: Hasher>(constructor: impl Fn(u128, u128) -> T) { + let mut hasher = constructor(1, 2); + "Foo".hash(&mut hasher); + let a = hasher.finish(); + let b = hasher.finish(); + assert_eq!(a, b); +} + +fn test_single_key_bit_flip<T: Hasher>(constructor: impl Fn(u128, u128) -> T) { + for bit in 0..128 { + let mut a = constructor(0, 0); + let mut b = constructor(0, 1 << bit); + let mut c = constructor(1 << bit, 0); + "1234".hash(&mut a); + "1234".hash(&mut b); + "1234".hash(&mut c); + assert_sufficiently_different(a.finish(), b.finish(), 2); + assert_sufficiently_different(a.finish(), c.finish(), 2); + assert_sufficiently_different(b.finish(), c.finish(), 2); + let mut a = constructor(0, 0); + let mut b = constructor(0, 1 << bit); + let mut c = constructor(1 << bit, 0); + "12345678".hash(&mut a); + "12345678".hash(&mut b); + "12345678".hash(&mut c); + assert_sufficiently_different(a.finish(), b.finish(), 2); + assert_sufficiently_different(a.finish(), c.finish(), 2); + assert_sufficiently_different(b.finish(), c.finish(), 2); + let mut a = constructor(0, 0); + let mut b = constructor(0, 1 << bit); + let mut c = constructor(1 << bit, 0); + "1234567812345678".hash(&mut a); + "1234567812345678".hash(&mut b); + "1234567812345678".hash(&mut c); + assert_sufficiently_different(a.finish(), b.finish(), 2); + assert_sufficiently_different(a.finish(), c.finish(), 2); + assert_sufficiently_different(b.finish(), c.finish(), 2); + } +} + +fn test_all_bytes_matter<T: Hasher>(hasher: impl Fn() -> T) { + let mut item = vec![0; 256]; + let base_hash = hash(&item, &hasher); + for pos in 0..256 { + item[pos] = 255; + let hash = hash(&item, &hasher); + assert_ne!(base_hash, hash, "Position {} did not affect output", pos); + item[pos] = 0; + } +} + +fn test_no_pair_collisions<T: Hasher>(hasher: impl Fn() -> T) { + let base = [0_u64, 0_u64]; + let base_hash = hash(&base, &hasher); + for bitpos1 in 0..64 { + let a = 1_u64 << bitpos1; + for bitpos2 in 0..bitpos1 { + let b = 1_u64 << bitpos2; + let aa = hash(&[a, a], &hasher); + let ab = hash(&[a, b], &hasher); + let ba = hash(&[b, a], &hasher); + let bb = hash(&[b, b], &hasher); + assert_sufficiently_different(base_hash, aa, 3); + assert_sufficiently_different(base_hash, ab, 3); + assert_sufficiently_different(base_hash, ba, 3); + assert_sufficiently_different(base_hash, bb, 3); + assert_sufficiently_different(aa, ab, 3); + assert_sufficiently_different(ab, ba, 3); + assert_sufficiently_different(ba, bb, 3); + assert_sufficiently_different(aa, ba, 3); + assert_sufficiently_different(ab, bb, 3); + assert_sufficiently_different(aa, bb, 3); + } + } +} + +fn hash<H: Hash, T: Hasher>(b: &H, hash_builder: &dyn Fn() -> T) -> u64 { + let mut hasher = hash_builder(); + b.hash(&mut hasher); + hasher.finish() +} + +fn hash_with<H: Hash, T: Hasher>(b: &H, mut hasher: T) -> u64 { + b.hash(&mut hasher); + hasher.finish() +} + +fn test_single_bit_flip<T: Hasher>(hasher: impl Fn() -> T) { + let size = 32; + let compare_value = hash(&0u32, &hasher); + for pos in 0..size { + let test_value = hash(&(1u32 << pos), &hasher); + assert_sufficiently_different(compare_value, test_value, 2); + } + let size = 64; + let compare_value = hash(&0u64, &hasher); + for pos in 0..size { + let test_value = hash(&(1u64 << pos), &hasher); + assert_sufficiently_different(compare_value, test_value, 2); + } + let size = 128; + let compare_value = hash(&0u128, &hasher); + for pos in 0..size { + let test_value = hash(&(1u128 << pos), &hasher); + dbg!(compare_value, test_value); + assert_sufficiently_different(compare_value, test_value, 2); + } +} + +fn test_padding_doesnot_collide<T: Hasher>(hasher: impl Fn() -> T) { + for c in 0..128u8 { + for string in ["", "\0", "\x01", "1234", "12345678", "1234567812345678"].iter() { + let mut short = hasher(); + string.hash(&mut short); + let value = short.finish(); + let mut padded = string.to_string(); + for num in 1..=128 { + let mut long = hasher(); + padded.push(c as char); + padded.hash(&mut long); + let (same_bytes, same_nibbles) = count_same_bytes_and_nibbles(value, long.finish()); + assert!( + same_bytes <= 3, + "{} bytes of {} -> {:x} vs {:x}", + num, + c, + value, + long.finish() + ); + assert!( + same_nibbles <= 8, + "{} bytes of {} -> {:x} vs {:x}", + num, + c, + value, + long.finish() + ); + let flipped_bits = (value ^ long.finish()).count_ones(); + assert!(flipped_bits > 10); + } + if string.len() > 0 { + let mut padded = string[1..].to_string(); + padded.push(c as char); + for num in 2..=128 { + let mut long = hasher(); + padded.push(c as char); + padded.hash(&mut long); + let (same_bytes, same_nibbles) = count_same_bytes_and_nibbles(value, long.finish()); + assert!( + same_bytes <= 3, + "string {:?} + {} bytes of {} -> {:x} vs {:x}", + string, + num, + c, + value, + long.finish() + ); + assert!( + same_nibbles <= 8, + "string {:?} + {} bytes of {} -> {:x} vs {:x}", + string, + num, + c, + value, + long.finish() + ); + let flipped_bits = (value ^ long.finish()).count_ones(); + assert!(flipped_bits > 10); + } + } + } + } +} + +fn test_length_extension<T: Hasher>(hasher: impl Fn(u128, u128) -> T) { + for key in 0..256 { + let h1 = hasher(key, key); + let v1 = hash_with(&[0_u8, 0, 0, 0, 0, 0, 0, 0], h1); + let h2 = hasher(key, key); + let v2 = hash_with(&[1_u8, 0, 0, 0, 0, 0, 0, 0, 0], h2); + assert_ne!(v1, v2); + } +} + +#[cfg(test)] +mod fallback_tests { + use crate::fallback_hash::*; + use crate::hash_quality_test::*; + + #[test] + fn fallback_single_bit_flip() { + test_single_bit_flip(|| AHasher::new_with_keys(0, 0)) + } + + #[test] + fn fallback_single_key_bit_flip() { + test_single_key_bit_flip(AHasher::new_with_keys) + } + + #[test] + fn fallback_all_bytes_matter() { + test_all_bytes_matter(|| AHasher::new_with_keys(0, 0)); + } + + #[test] + fn fallback_test_no_pair_collisions() { + test_no_pair_collisions(|| AHasher::new_with_keys(0, 0)); + } + + #[test] + fn fallback_test_no_full_collisions() { + test_no_full_collisions(|| AHasher::new_with_keys(0, 0)); + } + + #[test] + fn fallback_keys_change_output() { + test_keys_change_output(AHasher::new_with_keys); + } + + #[test] + fn fallback_input_affect_every_byte() { + test_input_affect_every_byte(AHasher::new_with_keys); + } + + #[test] + fn fallback_keys_affect_every_byte() { + //For fallback second key is not used in every hash. + #[cfg(all(not(feature = "specialize"), feature = "folded_multiply"))] + test_keys_affect_every_byte(0, |a, b| AHasher::new_with_keys(a ^ b, a)); + test_keys_affect_every_byte("", |a, b| AHasher::new_with_keys(a ^ b, a)); + test_keys_affect_every_byte((0, 0), |a, b| AHasher::new_with_keys(a ^ b, a)); + } + + #[test] + fn fallback_finish_is_consistant() { + test_finish_is_consistent(AHasher::test_with_keys) + } + + #[test] + fn fallback_padding_doesnot_collide() { + test_padding_doesnot_collide(|| AHasher::new_with_keys(0, 0)); + test_padding_doesnot_collide(|| AHasher::new_with_keys(0, 2)); + test_padding_doesnot_collide(|| AHasher::new_with_keys(2, 0)); + test_padding_doesnot_collide(|| AHasher::new_with_keys(2, 2)); + } + + #[test] + fn fallback_length_extension() { + test_length_extension(|a, b| AHasher::new_with_keys(a, b)); + } +} + +///Basic sanity tests of the cypto properties of aHash. +#[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all( + any(target_arch = "arm", target_arch = "aarch64"), + any(target_feature = "aes", target_feature = "crypto"), + not(miri), + feature = "stdsimd" + ) +))] +#[cfg(test)] +mod aes_tests { + use crate::aes_hash::*; + use crate::hash_quality_test::*; + use std::hash::{Hash, Hasher}; + + //This encrypts to 0. + const BAD_KEY2: u128 = 0x6363_6363_6363_6363_6363_6363_6363_6363; + //This decrypts to 0. + const BAD_KEY: u128 = 0x5252_5252_5252_5252_5252_5252_5252_5252; + + #[test] + fn test_single_bit_in_byte() { + let mut hasher1 = AHasher::test_with_keys(0, 0); + 8_u32.hash(&mut hasher1); + let mut hasher2 = AHasher::test_with_keys(0, 0); + 0_u32.hash(&mut hasher2); + assert_sufficiently_different(hasher1.finish(), hasher2.finish(), 1); + } + + #[test] + fn aes_single_bit_flip() { + test_single_bit_flip(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); + test_single_bit_flip(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); + } + + #[test] + fn aes_single_key_bit_flip() { + test_single_key_bit_flip(AHasher::test_with_keys) + } + + #[test] + fn aes_all_bytes_matter() { + test_all_bytes_matter(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); + test_all_bytes_matter(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); + } + + #[test] + fn aes_test_no_pair_collisions() { + test_no_pair_collisions(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); + test_no_pair_collisions(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); + } + + #[test] + fn ase_test_no_full_collisions() { + test_no_full_collisions(|| AHasher::test_with_keys(12345, 67890)); + } + + #[test] + fn aes_keys_change_output() { + test_keys_change_output(AHasher::test_with_keys); + } + + #[test] + fn aes_input_affect_every_byte() { + test_input_affect_every_byte(AHasher::test_with_keys); + } + + #[test] + fn aes_keys_affect_every_byte() { + #[cfg(not(feature = "specialize"))] + test_keys_affect_every_byte(0, AHasher::test_with_keys); + test_keys_affect_every_byte("", AHasher::test_with_keys); + test_keys_affect_every_byte((0, 0), AHasher::test_with_keys); + } + + #[test] + fn aes_finish_is_consistant() { + test_finish_is_consistent(AHasher::test_with_keys) + } + + #[test] + fn aes_padding_doesnot_collide() { + test_padding_doesnot_collide(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); + test_padding_doesnot_collide(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); + } + + #[test] + fn aes_length_extension() { + test_length_extension(|a, b| AHasher::test_with_keys(a, b)); + } +} diff --git a/src/hash_set.rs b/src/hash_set.rs new file mode 100644 index 0000000..d03bef5 --- /dev/null +++ b/src/hash_set.rs @@ -0,0 +1,352 @@ +use crate::RandomState; +use std::collections::{hash_set, HashSet}; +use std::fmt::{self, Debug}; +use std::hash::{BuildHasher, Hash}; +use std::iter::FromIterator; +use std::ops::{BitAnd, BitOr, BitXor, Deref, DerefMut, Sub}; + +#[cfg(feature = "serde")] +use serde::{ + de::{Deserialize, Deserializer}, + ser::{Serialize, Serializer}, +}; + +/// A [`HashSet`](std::collections::HashSet) using [`RandomState`](crate::RandomState) to hash the items. +/// (Requires the `std` feature to be enabled.) +#[derive(Clone)] +pub struct AHashSet<T, S = RandomState>(HashSet<T, S>); + +impl<T> From<HashSet<T, RandomState>> for AHashSet<T> { + fn from(item: HashSet<T, RandomState>) -> Self { + AHashSet(item) + } +} + +impl<T, const N: usize> From<[T; N]> for AHashSet<T> +where + T: Eq + Hash, +{ + /// # Examples + /// + /// ``` + /// use ahash::AHashSet; + /// + /// let set1 = AHashSet::from([1, 2, 3, 4]); + /// let set2: AHashSet<_> = [1, 2, 3, 4].into(); + /// assert_eq!(set1, set2); + /// ``` + fn from(arr: [T; N]) -> Self { + Self::from_iter(arr) + } +} + +impl<T> Into<HashSet<T, RandomState>> for AHashSet<T> { + fn into(self) -> HashSet<T, RandomState> { + self.0 + } +} + +impl<T> AHashSet<T, RandomState> { + /// This crates a hashset using [RandomState::new]. + /// See the documentation in [RandomSource] for notes about key strength. + pub fn new() -> Self { + AHashSet(HashSet::with_hasher(RandomState::new())) + } + + /// This crates a hashset with the specified capacity using [RandomState::new]. + /// See the documentation in [RandomSource] for notes about key strength. + pub fn with_capacity(capacity: usize) -> Self { + AHashSet(HashSet::with_capacity_and_hasher(capacity, RandomState::new())) + } +} + +impl<T, S> AHashSet<T, S> +where + S: BuildHasher, +{ + pub fn with_hasher(hash_builder: S) -> Self { + AHashSet(HashSet::with_hasher(hash_builder)) + } + + pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self { + AHashSet(HashSet::with_capacity_and_hasher(capacity, hash_builder)) + } +} + +impl<T, S> Deref for AHashSet<T, S> { + type Target = HashSet<T, S>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<T, S> DerefMut for AHashSet<T, S> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<T, S> PartialEq for AHashSet<T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + fn eq(&self, other: &AHashSet<T, S>) -> bool { + self.0.eq(&other.0) + } +} + +impl<T, S> Eq for AHashSet<T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ +} + +impl<T, S> BitOr<&AHashSet<T, S>> for &AHashSet<T, S> +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = AHashSet<T, S>; + + /// Returns the union of `self` and `rhs` as a new `AHashSet<T, S>`. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHashSet; + /// + /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: AHashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a | &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 3, 4, 5]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitor(self, rhs: &AHashSet<T, S>) -> AHashSet<T, S> { + AHashSet(self.0.bitor(&rhs.0)) + } +} + +impl<T, S> BitAnd<&AHashSet<T, S>> for &AHashSet<T, S> +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = AHashSet<T, S>; + + /// Returns the intersection of `self` and `rhs` as a new `AHashSet<T, S>`. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHashSet; + /// + /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: AHashSet<_> = vec![2, 3, 4].into_iter().collect(); + /// + /// let set = &a & &b; + /// + /// let mut i = 0; + /// let expected = [2, 3]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitand(self, rhs: &AHashSet<T, S>) -> AHashSet<T, S> { + AHashSet(self.0.bitand(&rhs.0)) + } +} + +impl<T, S> BitXor<&AHashSet<T, S>> for &AHashSet<T, S> +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = AHashSet<T, S>; + + /// Returns the symmetric difference of `self` and `rhs` as a new `AHashSet<T, S>`. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHashSet; + /// + /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: AHashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a ^ &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 4, 5]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitxor(self, rhs: &AHashSet<T, S>) -> AHashSet<T, S> { + AHashSet(self.0.bitxor(&rhs.0)) + } +} + +impl<T, S> Sub<&AHashSet<T, S>> for &AHashSet<T, S> +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = AHashSet<T, S>; + + /// Returns the difference of `self` and `rhs` as a new `AHashSet<T, S>`. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHashSet; + /// + /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: AHashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a - &b; + /// + /// let mut i = 0; + /// let expected = [1, 2]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn sub(self, rhs: &AHashSet<T, S>) -> AHashSet<T, S> { + AHashSet(self.0.sub(&rhs.0)) + } +} + +impl<T, S> Debug for AHashSet<T, S> +where + T: Debug, + S: BuildHasher, +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(fmt) + } +} + +impl<T> FromIterator<T> for AHashSet<T, RandomState> +where + T: Eq + Hash, +{ + /// This crates a hashset from the provided iterator using [RandomState::new]. + /// See the documentation in [RandomSource] for notes about key strength. + #[inline] + fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> AHashSet<T> { + let mut inner = HashSet::with_hasher(RandomState::new()); + inner.extend(iter); + AHashSet(inner) + } +} + +impl<'a, T, S> IntoIterator for &'a AHashSet<T, S> { + type Item = &'a T; + type IntoIter = hash_set::Iter<'a, T>; + fn into_iter(self) -> Self::IntoIter { + (&self.0).iter() + } +} + +impl<T, S> IntoIterator for AHashSet<T, S> { + type Item = T; + type IntoIter = hash_set::IntoIter<T>; + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl<T, S> Extend<T> for AHashSet<T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + #[inline] + fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { + self.0.extend(iter) + } +} + +impl<'a, T, S> Extend<&'a T> for AHashSet<T, S> +where + T: 'a + Eq + Hash + Copy, + S: BuildHasher, +{ + #[inline] + fn extend<I: IntoIterator<Item = &'a T>>(&mut self, iter: I) { + self.0.extend(iter) + } +} + +/// NOTE: For safety this trait impl is only available available if either of the flags `runtime-rng` (on by default) or +/// `compile-time-rng` are enabled. This is to prevent weakly keyed maps from being accidentally created. Instead one of +/// constructors for [RandomState] must be used. +#[cfg(any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng"))] +impl<T> Default for AHashSet<T, RandomState> { + /// Creates an empty `AHashSet<T, S>` with the `Default` value for the hasher. + #[inline] + fn default() -> AHashSet<T, RandomState> { + AHashSet(HashSet::default()) + } +} + +#[cfg(feature = "serde")] +impl<T> Serialize for AHashSet<T> +where + T: Serialize + Eq + Hash, +{ + fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { + self.deref().serialize(serializer) + } +} + +#[cfg(feature = "serde")] +impl<'de, T> Deserialize<'de> for AHashSet<T> +where + T: Deserialize<'de> + Eq + Hash, +{ + fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> { + let hash_set = HashSet::deserialize(deserializer); + hash_set.map(|hash_set| Self(hash_set)) + } + + fn deserialize_in_place<D: Deserializer<'de>>(deserializer: D, place: &mut Self) -> Result<(), D::Error> { + HashSet::deserialize_in_place(deserializer, place) + } +} + +#[cfg(all(test, feature = "serde"))] +mod test { + use super::*; + + #[test] + fn test_serde() { + let mut set = AHashSet::new(); + set.insert("for".to_string()); + set.insert("bar".to_string()); + let mut serialization = serde_json::to_string(&set).unwrap(); + let mut deserialization: AHashSet<String> = serde_json::from_str(&serialization).unwrap(); + assert_eq!(deserialization, set); + + set.insert("baz".to_string()); + serialization = serde_json::to_string(&set).unwrap(); + let mut deserializer = serde_json::Deserializer::from_str(&serialization); + AHashSet::deserialize_in_place(&mut deserializer, &mut deserialization).unwrap(); + assert_eq!(deserialization, set); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..978f424 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,397 @@ +//! AHash is a high performance keyed hash function. +//! +//! It quickly provides a high quality hash where the result is not predictable without knowing the Key. +//! AHash works with `HashMap` to hash keys, but without allowing for the possibility that an malicious user can +//! induce a collision. +//! +//! # How aHash works +//! +//! When it is available aHash uses the hardware AES instructions to provide a keyed hash function. +//! When it is not, aHash falls back on a slightly slower alternative algorithm. +//! +//! Because aHash does not have a fixed standard for its output, it is able to improve over time. +//! But this also means that different computers or computers using different versions of ahash may observe different +//! hash values for the same input. +#![cfg_attr( + all(feature = "std", any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng")), + doc = r##" +# Basic Usage +AHash provides an implementation of the [Hasher] trait. +To construct a HashMap using aHash as its hasher do the following: +``` +use ahash::{AHasher, RandomState}; +use std::collections::HashMap; + +let mut map: HashMap<i32, i32, RandomState> = HashMap::default(); +map.insert(12, 34); +``` + +### Randomness + +The above requires a source of randomness to generate keys for the hashmap. By default this obtained from the OS. +It is also possible to have randomness supplied via the `compile-time-rng` flag, or manually. + +### If randomess is not available + +[AHasher::default()] can be used to hash using fixed keys. This works with +[BuildHasherDefault](std::hash::BuildHasherDefault). For example: + +``` +use std::hash::BuildHasherDefault; +use std::collections::HashMap; +use ahash::AHasher; + +let mut m: HashMap<_, _, BuildHasherDefault<AHasher>> = HashMap::default(); + # m.insert(12, 34); +``` +It is also possible to instantiate [RandomState] directly: + +``` +use ahash::HashMap; +use ahash::RandomState; + +let mut m = HashMap::with_hasher(RandomState::with_seed(42)); + # m.insert(1, 2); +``` +Or for uses besides a hashhmap: +``` +use std::hash::BuildHasher; +use ahash::RandomState; + +let hash_builder = RandomState::with_seed(42); +let hash = hash_builder.hash_one("Some Data"); +``` +There are several constructors for [RandomState] with different ways to supply seeds. + +# Convenience wrappers + +For convenience, both new-type wrappers and type aliases are provided. + +The new type wrappers are called called `AHashMap` and `AHashSet`. +``` +use ahash::AHashMap; + +let mut map: AHashMap<i32, i32> = AHashMap::new(); +map.insert(12, 34); +``` +This avoids the need to type "RandomState". (For convience `From`, `Into`, and `Deref` are provided). + +# Aliases + +For even less typing and better interop with existing libraries (such as rayon) which require a `std::collection::HashMap` , +the type aliases [HashMap], [HashSet] are provided. + +``` +use ahash::{HashMap, HashMapExt}; + +let mut map: HashMap<i32, i32> = HashMap::new(); +map.insert(12, 34); +``` +Note the import of [HashMapExt]. This is needed for the constructor. + +"## +)] +#![deny(clippy::correctness, clippy::complexity, clippy::perf)] +#![allow(clippy::pedantic, clippy::cast_lossless, clippy::unreadable_literal)] +#![cfg_attr(all(not(test), not(feature = "std")), no_std)] +#![cfg_attr(feature = "specialize", feature(min_specialization))] +#![cfg_attr(feature = "specialize", feature(build_hasher_simple_hash_one))] +#![cfg_attr(feature = "stdsimd", feature(stdsimd))] + +#[macro_use] +mod convert; + +mod fallback_hash; + +cfg_if::cfg_if! { + if #[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), + any(target_feature = "aes", target_feature = "crypto"), + not(miri), + feature = "stdsimd") + ))] { + mod aes_hash; + pub use crate::aes_hash::AHasher; + } else { + pub use crate::fallback_hash::AHasher; + } +} + +cfg_if::cfg_if! { + if #[cfg(feature = "std")] { + mod hash_map; + mod hash_set; + + pub use crate::hash_map::AHashMap; + pub use crate::hash_set::AHashSet; + + /// [Hasher]: std::hash::Hasher + /// [HashMap]: std::collections::HashMap + /// Type alias for [HashMap]<K, V, ahash::RandomState> + pub type HashMap<K, V> = std::collections::HashMap<K, V, crate::RandomState>; + + /// Type alias for [HashSet]<K, ahash::RandomState> + pub type HashSet<K> = std::collections::HashSet<K, crate::RandomState>; + } +} + +#[cfg(test)] +mod hash_quality_test; + +mod operations; +pub mod random_state; +mod specialize; + +pub use crate::random_state::RandomState; + +use core::hash::BuildHasher; +use core::hash::Hash; +use core::hash::Hasher; + +#[cfg(feature = "std")] +/// A convenience trait that can be used together with the type aliases defined to +/// get access to the `new()` and `with_capacity()` methods for the HashMap type alias. +pub trait HashMapExt { + /// Constructs a new HashMap + fn new() -> Self; + /// Constructs a new HashMap with a given initial capacity + fn with_capacity(capacity: usize) -> Self; +} + +#[cfg(feature = "std")] +/// A convenience trait that can be used together with the type aliases defined to +/// get access to the `new()` and `with_capacity()` methods for the HashSet type aliases. +pub trait HashSetExt { + /// Constructs a new HashSet + fn new() -> Self; + /// Constructs a new HashSet with a given initial capacity + fn with_capacity(capacity: usize) -> Self; +} + +#[cfg(feature = "std")] +impl<K, V, S> HashMapExt for std::collections::HashMap<K, V, S> +where + S: BuildHasher + Default, +{ + fn new() -> Self { + std::collections::HashMap::with_hasher(S::default()) + } + + fn with_capacity(capacity: usize) -> Self { + std::collections::HashMap::with_capacity_and_hasher(capacity, S::default()) + } +} + +#[cfg(feature = "std")] +impl<K, S> HashSetExt for std::collections::HashSet<K, S> +where + S: BuildHasher + Default, +{ + fn new() -> Self { + std::collections::HashSet::with_hasher(S::default()) + } + + fn with_capacity(capacity: usize) -> Self { + std::collections::HashSet::with_capacity_and_hasher(capacity, S::default()) + } +} + +/// Provides a default [Hasher] with fixed keys. +/// This is typically used in conjunction with [BuildHasherDefault] to create +/// [AHasher]s in order to hash the keys of the map. +/// +/// Generally it is preferable to use [RandomState] instead, so that different +/// hashmaps will have different keys. However if fixed keys are desirable this +/// may be used instead. +/// +/// # Example +/// ``` +/// use std::hash::BuildHasherDefault; +/// use ahash::{AHasher, RandomState}; +/// use std::collections::HashMap; +/// +/// let mut map: HashMap<i32, i32, BuildHasherDefault<AHasher>> = HashMap::default(); +/// map.insert(12, 34); +/// ``` +/// +/// [BuildHasherDefault]: std::hash::BuildHasherDefault +/// [Hasher]: std::hash::Hasher +/// [HashMap]: std::collections::HashMap +impl Default for AHasher { + /// Constructs a new [AHasher] with fixed keys. + /// If `std` is enabled these will be generated upon first invocation. + /// Otherwise if the `compile-time-rng`feature is enabled these will be generated at compile time. + /// If neither of these features are available, hardcoded constants will be used. + /// + /// Because the values are fixed, different hashers will all hash elements the same way. + /// This could make hash values predictable, if DOS attacks are a concern. If this behaviour is + /// not required, it may be preferable to use [RandomState] instead. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHasher; + /// use std::hash::Hasher; + /// + /// let mut hasher_1 = AHasher::default(); + /// let mut hasher_2 = AHasher::default(); + /// + /// hasher_1.write_u32(1234); + /// hasher_2.write_u32(1234); + /// + /// assert_eq!(hasher_1.finish(), hasher_2.finish()); + /// ``` + #[inline] + fn default() -> AHasher { + RandomState::with_fixed_keys().build_hasher() + } +} + +/// Used for specialization. (Sealed) +pub(crate) trait BuildHasherExt: BuildHasher { + #[doc(hidden)] + fn hash_as_u64<T: Hash + ?Sized>(&self, value: &T) -> u64; + + #[doc(hidden)] + fn hash_as_fixed_length<T: Hash + ?Sized>(&self, value: &T) -> u64; + + #[doc(hidden)] + fn hash_as_str<T: Hash + ?Sized>(&self, value: &T) -> u64; +} + +impl<B: BuildHasher> BuildHasherExt for B { + #[inline] + #[cfg(feature = "specialize")] + default fn hash_as_u64<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(not(feature = "specialize"))] + fn hash_as_u64<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(feature = "specialize")] + default fn hash_as_fixed_length<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(not(feature = "specialize"))] + fn hash_as_fixed_length<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(feature = "specialize")] + default fn hash_as_str<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(not(feature = "specialize"))] + fn hash_as_str<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } +} + +// #[inline(never)] +// #[doc(hidden)] +// pub fn hash_test(input: &[u8]) -> u64 { +// let a = RandomState::with_seeds(11, 22, 33, 44); +// <[u8]>::get_hash(input, &a) +// } + +#[cfg(feature = "std")] +#[cfg(test)] +mod test { + use crate::convert::Convert; + use crate::specialize::CallHasher; + use crate::*; + use std::collections::HashMap; + use std::hash::Hash; + + #[test] + fn test_ahash_alias_map_construction() { + let mut map = super::HashMap::with_capacity(1234); + map.insert(1, "test"); + } + + #[test] + fn test_ahash_alias_set_construction() { + let mut set = super::HashSet::with_capacity(1234); + set.insert(1); + } + + #[test] + fn test_default_builder() { + use core::hash::BuildHasherDefault; + + let mut map = HashMap::<u32, u64, BuildHasherDefault<AHasher>>::default(); + map.insert(1, 3); + } + + #[test] + fn test_builder() { + let mut map = HashMap::<u32, u64, RandomState>::default(); + map.insert(1, 3); + } + + #[test] + fn test_conversion() { + let input: &[u8] = b"dddddddd"; + let bytes: u64 = as_array!(input, 8).convert(); + assert_eq!(bytes, 0x6464646464646464); + } + + #[test] + fn test_non_zero() { + let mut hasher1 = AHasher::new_with_keys(0, 0); + let mut hasher2 = AHasher::new_with_keys(0, 0); + "foo".hash(&mut hasher1); + "bar".hash(&mut hasher2); + assert_ne!(hasher1.finish(), 0); + assert_ne!(hasher2.finish(), 0); + assert_ne!(hasher1.finish(), hasher2.finish()); + + let mut hasher1 = AHasher::new_with_keys(0, 0); + let mut hasher2 = AHasher::new_with_keys(0, 0); + 3_u64.hash(&mut hasher1); + 4_u64.hash(&mut hasher2); + assert_ne!(hasher1.finish(), 0); + assert_ne!(hasher2.finish(), 0); + assert_ne!(hasher1.finish(), hasher2.finish()); + } + + #[test] + fn test_non_zero_specialized() { + let hasher_build = RandomState::with_seeds(0, 0, 0, 0); + + let h1 = str::get_hash("foo", &hasher_build); + let h2 = str::get_hash("bar", &hasher_build); + assert_ne!(h1, 0); + assert_ne!(h2, 0); + assert_ne!(h1, h2); + + let h1 = u64::get_hash(&3_u64, &hasher_build); + let h2 = u64::get_hash(&4_u64, &hasher_build); + assert_ne!(h1, 0); + assert_ne!(h2, 0); + assert_ne!(h1, h2); + } + + #[test] + fn test_ahasher_construction() { + let _ = AHasher::new_with_keys(1234, 5678); + } +} diff --git a/src/operations.rs b/src/operations.rs new file mode 100644 index 0000000..ffd3b1a --- /dev/null +++ b/src/operations.rs @@ -0,0 +1,383 @@ +use crate::convert::*; + +///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32). +pub(crate) const MULTIPLE: u64 = 6364136223846793005; + +/// This is a constant with a lot of special properties found by automated search. +/// See the unit tests below. (Below are alternative values) +#[cfg(all(target_feature = "ssse3", not(miri)))] +const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128; +//const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128; +//const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128; + +#[inline(always)] +#[cfg(feature = "folded_multiply")] +pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { + let result = (s as u128).wrapping_mul(by as u128); + ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64) +} + +#[inline(always)] +#[cfg(not(feature = "folded_multiply"))] +pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { + let b1 = s.wrapping_mul(by.swap_bytes()); + let b2 = s.swap_bytes().wrapping_mul(!by); + b1 ^ b2.swap_bytes() +} + +/// Given a small (less than 8 byte slice) returns the same data stored in two u32s. +/// (order of and non-duplication of bytes is NOT guaranteed) +#[inline(always)] +pub(crate) fn read_small(data: &[u8]) -> [u64; 2] { + debug_assert!(data.len() <= 8); + if data.len() >= 2 { + if data.len() >= 4 { + //len 4-8 + [data.read_u32().0 as u64, data.read_last_u32() as u64] + } else { + //len 2-3 + [data.read_u16().0 as u64, data[data.len() - 1] as u64] + } + } else { + if data.len() > 0 { + [data[0] as u64, data[0] as u64] + } else { + [0, 0] + } + } +} + +#[inline(always)] +pub(crate) fn shuffle(a: u128) -> u128 { + #[cfg(all(target_feature = "ssse3", not(miri)))] + { + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + use core::mem::transmute; + unsafe { transmute(_mm_shuffle_epi8(transmute(a), transmute(SHUFFLE_MASK))) } + } + #[cfg(not(all(target_feature = "ssse3", not(miri))))] + { + a.swap_bytes() + } +} + +#[allow(unused)] //not used by fallback +#[inline(always)] +pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 { + let sum = add_by_64s(a.convert(), b.convert()); + shuffle(sum.convert()) +} + +#[allow(unused)] //not used by fallback +#[inline(always)] +pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 { + let shuffled: [u64; 2] = shuffle(base).convert(); + add_by_64s(shuffled, to_add.convert()).convert() +} + +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))] +#[inline(always)] +pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { + use core::mem::transmute; + unsafe { + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + transmute(_mm_add_epi64(transmute(a), transmute(b))) + } +} + +#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))] +#[inline(always)] +pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { + [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])] +} + +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))] +#[allow(unused)] +#[inline(always)] +pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + use core::mem::transmute; + unsafe { + let value = transmute(value); + transmute(_mm_aesenc_si128(value, transmute(xor))) + } +} + +#[cfg(all( + any(target_arch = "arm", target_arch = "aarch64"), + any(target_feature = "aes", target_feature = "crypto"), + not(miri), + feature = "stdsimd" +))] +#[allow(unused)] +#[inline(always)] +pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { + #[cfg(target_arch = "aarch64")] + use core::arch::aarch64::*; + #[cfg(target_arch = "arm")] + use core::arch::arm::*; + use core::mem::transmute; + unsafe { + let value = transmute(value); + transmute(vaesmcq_u8(vaeseq_u8(value, transmute(xor)))) + } +} + +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))] +#[allow(unused)] +#[inline(always)] +pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + use core::mem::transmute; + unsafe { + let value = transmute(value); + transmute(_mm_aesdec_si128(value, transmute(xor))) + } +} + +#[cfg(all( + any(target_arch = "arm", target_arch = "aarch64"), + any(target_feature = "aes", target_feature = "crypto"), + not(miri), + feature = "stdsimd" +))] +#[allow(unused)] +#[inline(always)] +pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { + #[cfg(target_arch = "aarch64")] + use core::arch::aarch64::*; + #[cfg(target_arch = "arm")] + use core::arch::arm::*; + use core::mem::transmute; + unsafe { + let value = transmute(value); + transmute(vaesimcq_u8(vaesdq_u8(value, transmute(xor)))) + } +} + +#[allow(unused)] +#[inline(always)] +pub(crate) fn add_in_length(enc: &mut u128, len: u64) { + #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))] + { + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + + unsafe { + let enc = enc as *mut u128; + let len = _mm_cvtsi64_si128(len as i64); + let data = _mm_loadu_si128(enc.cast()); + let sum = _mm_add_epi64(data, len); + _mm_storeu_si128(enc.cast(), sum); + } + } + #[cfg(not(all(target_arch = "x86_64", target_feature = "sse2", not(miri))))] + { + let mut t: [u64; 2] = enc.convert(); + t[0] = t[0].wrapping_add(len); + *enc = t.convert(); + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::convert::Convert; + + // This is code to search for the shuffle constant + // + //thread_local! { static MASK: Cell<u128> = Cell::new(0); } + // + // fn shuffle(a: u128) -> u128 { + // use std::intrinsics::transmute; + // #[cfg(target_arch = "x86")] + // use core::arch::x86::*; + // #[cfg(target_arch = "x86_64")] + // use core::arch::x86_64::*; + // MASK.with(|mask| { + // unsafe { transmute(_mm_shuffle_epi8(transmute(a), transmute(mask.get()))) } + // }) + // } + // + // #[test] + // fn find_shuffle() { + // use rand::prelude::*; + // use SliceRandom; + // use std::panic; + // use std::io::Write; + // + // let mut value: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ,13, 14, 15]; + // let mut rand = thread_rng(); + // let mut successful_list = HashMap::new(); + // for _attempt in 0..10000000 { + // rand.shuffle(&mut value); + // let test_val = value.convert(); + // MASK.with(|mask| { + // mask.set(test_val); + // }); + // if let Ok(successful) = panic::catch_unwind(|| { + // test_shuffle_does_not_collide_with_aes(); + // test_shuffle_moves_high_bits(); + // test_shuffle_moves_every_value(); + // //test_shuffle_does_not_loop(); + // value + // }) { + // let successful: u128 = successful.convert(); + // successful_list.insert(successful, iters_before_loop()); + // } + // } + // let write_file = File::create("/tmp/output").unwrap(); + // let mut writer = BufWriter::new(&write_file); + // + // for success in successful_list { + // writeln!(writer, "Found successful: {:x?} - {:?}", success.0, success.1); + // } + // } + // + // fn iters_before_loop() -> u32 { + // let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; + // let mut shuffled = shuffle(numbered); + // let mut count = 0; + // loop { + // // println!("{:>16x}", shuffled); + // if numbered == shuffled { + // break; + // } + // count += 1; + // shuffled = shuffle(shuffled); + // } + // count + // } + + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "ssse3", + target_feature = "aes", + not(miri) + ))] + #[test] + fn test_shuffle_does_not_collide_with_aes() { + let mut value: [u8; 16] = [0; 16]; + let zero_mask_enc = aesenc(0, 0); + let zero_mask_dec = aesdec(0, 0); + for index in 0..16 { + value[index] = 1; + let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert(); + let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert(); + let actual_location: [u8; 16] = shuffle(value.convert()).convert(); + for pos in 0..16 { + if actual_location[pos] != 0 { + assert_eq!( + 0, excluded_positions_enc[pos], + "Forward Overlap between {:?} and {:?} at {}", + excluded_positions_enc, actual_location, index + ); + assert_eq!( + 0, excluded_positions_dec[pos], + "Reverse Overlap between {:?} and {:?} at {}", + excluded_positions_dec, actual_location, index + ); + } + } + value[index] = 0; + } + } + + #[test] + fn test_shuffle_contains_each_value() { + let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert(); + let shuffled: [u8; 16] = shuffle(value.convert()).convert(); + for index in 0..16_u8 { + assert!(shuffled.contains(&index), "Value is missing {}", index); + } + } + + #[test] + fn test_shuffle_moves_every_value() { + let mut value: [u8; 16] = [0; 16]; + for index in 0..16 { + value[index] = 1; + let shuffled: [u8; 16] = shuffle(value.convert()).convert(); + assert_eq!(0, shuffled[index], "Value is not moved {}", index); + value[index] = 0; + } + } + + #[test] + fn test_shuffle_moves_high_bits() { + assert!( + shuffle(1) > (1_u128 << 80), + "Low bits must be moved to other half {:?} -> {:?}", + 0, + shuffle(1) + ); + + assert!( + shuffle(1_u128 << 58) >= (1_u128 << 64), + "High bits must be moved to other half {:?} -> {:?}", + 7, + shuffle(1_u128 << 58) + ); + assert!( + shuffle(1_u128 << 58) < (1_u128 << 112), + "High bits must not remain high {:?} -> {:?}", + 7, + shuffle(1_u128 << 58) + ); + assert!( + shuffle(1_u128 << 64) < (1_u128 << 64), + "Low bits must be moved to other half {:?} -> {:?}", + 8, + shuffle(1_u128 << 64) + ); + assert!( + shuffle(1_u128 << 64) >= (1_u128 << 16), + "Low bits must not remain low {:?} -> {:?}", + 8, + shuffle(1_u128 << 64) + ); + + assert!( + shuffle(1_u128 << 120) < (1_u128 << 50), + "High bits must be moved to low half {:?} -> {:?}", + 15, + shuffle(1_u128 << 120) + ); + } + + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "ssse3", + not(miri) + ))] + #[test] + fn test_shuffle_does_not_loop() { + let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; + let mut shuffled = shuffle(numbered); + for count in 0..100 { + // println!("{:>16x}", shuffled); + assert_ne!(numbered, shuffled, "Equal after {} vs {:x}", count, shuffled); + shuffled = shuffle(shuffled); + } + } + + #[test] + fn test_add_length() { + let mut enc = (u64::MAX as u128) << 64 | 50; + add_in_length(&mut enc, u64::MAX); + assert_eq!(enc >> 64, u64::MAX as u128); + assert_eq!(enc as u64, 49); + } +} diff --git a/src/random_state.rs b/src/random_state.rs new file mode 100644 index 0000000..54b754d --- /dev/null +++ b/src/random_state.rs @@ -0,0 +1,529 @@ +use core::hash::Hash; +cfg_if::cfg_if! { + if #[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd") + ))] { + use crate::aes_hash::*; + } else { + use crate::fallback_hash::*; + } +} +cfg_if::cfg_if! { + if #[cfg(feature = "specialize")]{ + use crate::BuildHasherExt; + } +} +cfg_if::cfg_if! { + if #[cfg(feature = "std")] { + extern crate std as alloc; + } else { + extern crate alloc; + } +} + +#[cfg(feature = "atomic-polyfill")] +use atomic_polyfill as atomic; +#[cfg(not(feature = "atomic-polyfill"))] +use core::sync::atomic; + +use alloc::boxed::Box; +use atomic::{AtomicUsize, Ordering}; +use core::any::{Any, TypeId}; +use core::fmt; +use core::hash::BuildHasher; +use core::hash::Hasher; + +pub(crate) const PI: [u64; 4] = [ + 0x243f_6a88_85a3_08d3, + 0x1319_8a2e_0370_7344, + 0xa409_3822_299f_31d0, + 0x082e_fa98_ec4e_6c89, +]; + +pub(crate) const PI2: [u64; 4] = [ + 0x4528_21e6_38d0_1377, + 0xbe54_66cf_34e9_0c6c, + 0xc0ac_29b7_c97c_50dd, + 0x3f84_d5b5_b547_0917, +]; + +cfg_if::cfg_if! { + if #[cfg(all(feature = "compile-time-rng", any(test, fuzzing)))] { + #[inline] + fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { + use const_random::const_random; + + const RAND: [[u64; 4]; 2] = [ + [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ], [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ] + ]; + &RAND + } + } else if #[cfg(all(feature = "runtime-rng", not(fuzzing)))] { + #[inline] + fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { + use crate::convert::Convert; + + static SEEDS: OnceBox<[[u64; 4]; 2]> = OnceBox::new(); + + SEEDS.get_or_init(|| { + let mut result: [u8; 64] = [0; 64]; + getrandom::getrandom(&mut result).expect("getrandom::getrandom() failed."); + Box::new(result.convert()) + }) + } + } else if #[cfg(feature = "compile-time-rng")] { + #[inline] + fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { + use const_random::const_random; + + const RAND: [[u64; 4]; 2] = [ + [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ], [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ] + ]; + &RAND + } + } else { + #[inline] + fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { + &[PI, PI2] + } + } +} + +cfg_if::cfg_if! { + if #[cfg(not(all(target_arch = "arm", target_os = "none")))] { + use once_cell::race::OnceBox; + + static RAND_SOURCE: OnceBox<Box<dyn RandomSource + Send + Sync>> = OnceBox::new(); + } +} +/// A supplier of Randomness used for different hashers. +/// See [set_random_source]. +/// +/// If [set_random_source] aHash will default to the best available source of randomness. +/// In order this is: +/// 1. OS provided random number generator (available if the `runtime-rng` flag is enabled which it is by default) - This should be very strong. +/// 2. Strong compile time random numbers used to permute a static "counter". (available if `compile-time-rng` is enabled. +/// __Enabling this is recommended if `runtime-rng` is not possible__) +/// 3. A static counter that adds the memory address of each [RandomState] created permuted with fixed constants. +/// (Similar to above but with fixed keys) - This is the weakest option. The strength of this heavily depends on whether or not ASLR is enabled. +/// (Rust enables ASLR by default) +pub trait RandomSource { + fn gen_hasher_seed(&self) -> usize; +} + +struct DefaultRandomSource { + counter: AtomicUsize, +} + +impl DefaultRandomSource { + fn new() -> DefaultRandomSource { + DefaultRandomSource { + counter: AtomicUsize::new(&PI as *const _ as usize), + } + } + + #[cfg(all(target_arch = "arm", target_os = "none"))] + const fn default() -> DefaultRandomSource { + DefaultRandomSource { + counter: AtomicUsize::new(PI[3] as usize), + } + } +} + +impl RandomSource for DefaultRandomSource { + cfg_if::cfg_if! { + if #[cfg(all(target_arch = "arm", target_os = "none"))] { + fn gen_hasher_seed(&self) -> usize { + let stack = self as *const _ as usize; + let previous = self.counter.load(Ordering::Relaxed); + let new = previous.wrapping_add(stack); + self.counter.store(new, Ordering::Relaxed); + new + } + } else { + fn gen_hasher_seed(&self) -> usize { + let stack = self as *const _ as usize; + self.counter.fetch_add(stack, Ordering::Relaxed) + } + } + } +} + +cfg_if::cfg_if! { + if #[cfg(all(target_arch = "arm", target_os = "none"))] { + #[inline] + fn get_src() -> &'static dyn RandomSource { + static RAND_SOURCE: DefaultRandomSource = DefaultRandomSource::default(); + &RAND_SOURCE + } + } else { + /// Provides an optional way to manually supply a source of randomness for Hasher keys. + /// + /// The provided [RandomSource] will be used to be used as a source of randomness by [RandomState] to generate new states. + /// If this method is not invoked the standard source of randomness is used as described in the Readme. + /// + /// The source of randomness can only be set once, and must be set before the first RandomState is created. + /// If the source has already been specified `Err` is returned with a `bool` indicating if the set failed because + /// method was previously invoked (true) or if the default source is already being used (false). + #[cfg(not(all(target_arch = "arm", target_os = "none")))] + pub fn set_random_source(source: impl RandomSource + Send + Sync + 'static) -> Result<(), bool> { + RAND_SOURCE.set(Box::new(Box::new(source))).map_err(|s| s.as_ref().type_id() != TypeId::of::<&DefaultRandomSource>()) + } + + #[inline] + fn get_src() -> &'static dyn RandomSource { + RAND_SOURCE.get_or_init(|| Box::new(Box::new(DefaultRandomSource::new()))).as_ref() + } + } +} + +/// Provides a [Hasher] factory. This is typically used (e.g. by [HashMap]) to create +/// [AHasher]s in order to hash the keys of the map. See `build_hasher` below. +/// +/// [build_hasher]: ahash:: +/// [Hasher]: std::hash::Hasher +/// [BuildHasher]: std::hash::BuildHasher +/// [HashMap]: std::collections::HashMap +/// +/// There are multiple constructors each is documented in more detail below: +/// +/// | Constructor | Dynamically random? | Seed | +/// |---------------|---------------------|------| +/// |`new` | Each instance unique|_[RandomSource]_| +/// |`generate_with`| Each instance unique|`u64` x 4 + [RandomSource]| +/// |`with_seed` | Fixed per process |`u64` + static random number| +/// |`with_seeds` | Fixed |`u64` x 4| +/// +#[derive(Clone)] +pub struct RandomState { + pub(crate) k0: u64, + pub(crate) k1: u64, + pub(crate) k2: u64, + pub(crate) k3: u64, +} + +impl fmt::Debug for RandomState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad("RandomState { .. }") + } +} + +impl RandomState { + + /// Create a new `RandomState` `BuildHasher` using random keys. + /// + /// Each instance will have a unique set of keys derived from [RandomSource]. + /// + #[inline] + pub fn new() -> RandomState { + let src = get_src(); + let fixed = get_fixed_seeds(); + Self::from_keys(&fixed[0], &fixed[1], src.gen_hasher_seed()) + } + + /// Create a new `RandomState` `BuildHasher` based on the provided seeds, but in such a way + /// that each time it is called the resulting state will be different and of high quality. + /// This allows fixed constant or poor quality seeds to be provided without the problem of different + /// `BuildHasher`s being identical or weak. + /// + /// This is done via permuting the provided values with the value of a static counter and memory address. + /// (This makes this method somewhat more expensive than `with_seeds` below which does not do this). + /// + /// The provided values (k0-k3) do not need to be of high quality but they should not all be the same value. + #[inline] + pub fn generate_with(k0: u64, k1: u64, k2: u64, k3: u64) -> RandomState { + let src = get_src(); + let fixed = get_fixed_seeds(); + RandomState::from_keys(&fixed[0], &[k0, k1, k2, k3], src.gen_hasher_seed()) + } + + fn from_keys(a: &[u64; 4], b: &[u64; 4], c: usize) -> RandomState { + let &[k0, k1, k2, k3] = a; + let mut hasher = AHasher::from_random_state(&RandomState { k0, k1, k2, k3 }); + hasher.write_usize(c); + let mix = |l: u64, r: u64| { + let mut h = hasher.clone(); + h.write_u64(l); + h.write_u64(r); + h.finish() + }; + RandomState { + k0: mix(b[0], b[2]), + k1: mix(b[1], b[3]), + k2: mix(b[2], b[1]), + k3: mix(b[3], b[0]), + } + } + + /// Internal. Used by Default. + #[inline] + pub(crate) fn with_fixed_keys() -> RandomState { + let [k0, k1, k2, k3] = get_fixed_seeds()[0]; + RandomState { k0, k1, k2, k3 } + } + + /// Build a `RandomState` from a single key. The provided key does not need to be of high quality, + /// but all `RandomState`s created from the same key will produce identical hashers. + /// (In contrast to `generate_with` above) + /// + /// This allows for explicitly setting the seed to be used. + /// + /// Note: This method does not require the provided seed to be strong. + #[inline] + pub fn with_seed(key: usize) -> RandomState { + let fixed = get_fixed_seeds(); + RandomState::from_keys(&fixed[0], &fixed[1], key) + } + + /// Allows for explicitly setting the seeds to used. + /// All `RandomState`s created with the same set of keys key will produce identical hashers. + /// (In contrast to `generate_with` above) + /// + /// Note: If DOS resistance is desired one of these should be a decent quality random number. + /// If 4 high quality random number are not cheaply available this method is robust against 0s being passed for + /// one or more of the parameters or the same value being passed for more than one parameter. + /// It is recommended to pass numbers in order from highest to lowest quality (if there is any difference). + #[inline] + pub const fn with_seeds(k0: u64, k1: u64, k2: u64, k3: u64) -> RandomState { + RandomState { + k0: k0 ^ PI2[0], + k1: k1 ^ PI2[1], + k2: k2 ^ PI2[2], + k3: k3 ^ PI2[3], + } + } + + /// Calculates the hash of a single value. This provides a more convenient (and faster) way to obtain a hash: + /// For example: + #[cfg_attr( + feature = "std", + doc = r##" # Examples +``` + use std::hash::BuildHasher; + use ahash::RandomState; + + let hash_builder = RandomState::new(); + let hash = hash_builder.hash_one("Some Data"); +``` + "## + )] + /// This is similar to: + #[cfg_attr( + feature = "std", + doc = r##" # Examples +``` + use std::hash::{BuildHasher, Hash, Hasher}; + use ahash::RandomState; + + let hash_builder = RandomState::new(); + let mut hasher = hash_builder.build_hasher(); + "Some Data".hash(&mut hasher); + let hash = hasher.finish(); +``` + "## + )] + /// (Note that these two ways to get a hash may not produce the same value for the same data) + /// + /// This is intended as a convenience for code which *consumes* hashes, such + /// as the implementation of a hash table or in unit tests that check + /// whether a custom [`Hash`] implementation behaves as expected. + /// + /// This must not be used in any code which *creates* hashes, such as in an + /// implementation of [`Hash`]. The way to create a combined hash of + /// multiple values is to call [`Hash::hash`] multiple times using the same + /// [`Hasher`], not to call this method repeatedly and combine the results. + #[inline] + pub fn hash_one<T: Hash>(&self, x: T) -> u64 + where + Self: Sized, + { + use crate::specialize::CallHasher; + T::get_hash(&x, self) + } +} + +/// Creates an instance of RandomState using keys obtained from the random number generator. +/// Each instance created in this way will have a unique set of keys. (But the resulting instance +/// can be used to create many hashers each or which will have the same keys.) +/// +/// This is the same as [RandomState::new()] +/// +/// NOTE: For safety this trait impl is only available available if either of the flags `runtime-rng` (on by default) or +/// `compile-time-rng` are enabled. This is to prevent weakly keyed maps from being accidentally created. Instead one of +/// constructors for [RandomState] must be used. +#[cfg(any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng"))] +impl Default for RandomState { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl BuildHasher for RandomState { + type Hasher = AHasher; + + /// Constructs a new [AHasher] with keys based on this [RandomState] object. + /// This means that two different [RandomState]s will will generate + /// [AHasher]s that will return different hashcodes, but [Hasher]s created from the same [BuildHasher] + /// will generate the same hashes for the same input data. + /// + #[cfg_attr( + feature = "std", + doc = r##" # Examples +``` + use ahash::{AHasher, RandomState}; + use std::hash::{Hasher, BuildHasher}; + + let build_hasher = RandomState::new(); + let mut hasher_1 = build_hasher.build_hasher(); + let mut hasher_2 = build_hasher.build_hasher(); + + hasher_1.write_u32(1234); + hasher_2.write_u32(1234); + + assert_eq!(hasher_1.finish(), hasher_2.finish()); + + let other_build_hasher = RandomState::new(); + let mut different_hasher = other_build_hasher.build_hasher(); + different_hasher.write_u32(1234); + assert_ne!(different_hasher.finish(), hasher_1.finish()); +``` + "## + )] + /// [Hasher]: std::hash::Hasher + /// [BuildHasher]: std::hash::BuildHasher + /// [HashMap]: std::collections::HashMap + #[inline] + fn build_hasher(&self) -> AHasher { + AHasher::from_random_state(self) + } + + + /// Calculates the hash of a single value. This provides a more convenient (and faster) way to obtain a hash: + /// For example: + #[cfg_attr( + feature = "std", + doc = r##" # Examples +``` + use std::hash::BuildHasher; + use ahash::RandomState; + + let hash_builder = RandomState::new(); + let hash = hash_builder.hash_one("Some Data"); +``` + "## + )] + /// This is similar to: + #[cfg_attr( + feature = "std", + doc = r##" # Examples +``` + use std::hash::{BuildHasher, Hash, Hasher}; + use ahash::RandomState; + + let hash_builder = RandomState::new(); + let mut hasher = hash_builder.build_hasher(); + "Some Data".hash(&mut hasher); + let hash = hasher.finish(); +``` + "## + )] + /// (Note that these two ways to get a hash may not produce the same value for the same data) + /// + /// This is intended as a convenience for code which *consumes* hashes, such + /// as the implementation of a hash table or in unit tests that check + /// whether a custom [`Hash`] implementation behaves as expected. + /// + /// This must not be used in any code which *creates* hashes, such as in an + /// implementation of [`Hash`]. The way to create a combined hash of + /// multiple values is to call [`Hash::hash`] multiple times using the same + /// [`Hasher`], not to call this method repeatedly and combine the results. + #[cfg(feature = "specialize")] + #[inline] + fn hash_one<T: Hash>(&self, x: T) -> u64 { + RandomState::hash_one(self, x) + } +} + +#[cfg(feature = "specialize")] +impl BuildHasherExt for RandomState { + #[inline] + fn hash_as_u64<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = AHasherU64 { + buffer: self.k0, + pad: self.k1, + }; + value.hash(&mut hasher); + hasher.finish() + } + + #[inline] + fn hash_as_fixed_length<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = AHasherFixed(self.build_hasher()); + value.hash(&mut hasher); + hasher.finish() + } + + #[inline] + fn hash_as_str<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = AHasherStr(self.build_hasher()); + value.hash(&mut hasher); + hasher.finish() + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_unique() { + let a = RandomState::generate_with(1, 2, 3, 4); + let b = RandomState::generate_with(1, 2, 3, 4); + assert_ne!(a.build_hasher().finish(), b.build_hasher().finish()); + } + + #[cfg(all(feature = "runtime-rng", not(all(feature = "compile-time-rng", test))))] + #[test] + fn test_not_pi() { + assert_ne!(PI, get_fixed_seeds()[0]); + } + + #[cfg(all(feature = "compile-time-rng", any(not(feature = "runtime-rng"), test)))] + #[test] + fn test_not_pi_const() { + assert_ne!(PI, get_fixed_seeds()[0]); + } + + #[cfg(all(not(feature = "runtime-rng"), not(feature = "compile-time-rng")))] + #[test] + fn test_pi() { + assert_eq!(PI, get_fixed_seeds()[0]); + } + + #[test] + fn test_with_seeds_const() { + const _CONST_RANDOM_STATE: RandomState = RandomState::with_seeds(17, 19, 21, 23); + } +} diff --git a/src/specialize.rs b/src/specialize.rs new file mode 100644 index 0000000..05d335b --- /dev/null +++ b/src/specialize.rs @@ -0,0 +1,218 @@ +use core::hash::BuildHasher; +use core::hash::Hash; +use core::hash::Hasher; + +#[cfg(not(feature = "std"))] +extern crate alloc; +#[cfg(feature = "std")] +extern crate std as alloc; + +#[cfg(feature = "specialize")] +use crate::BuildHasherExt; +#[cfg(feature = "specialize")] +use alloc::string::String; +#[cfg(feature = "specialize")] +use alloc::vec::Vec; + +/// Provides a way to get an optimized hasher for a given data type. +/// Rather than using a Hasher generically which can hash any value, this provides a way to get a specialized hash +/// for a specific type. So this may be faster for primitive types. +pub(crate) trait CallHasher { + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64; +} + +#[cfg(not(feature = "specialize"))] +impl<T> CallHasher for T +where + T: Hash + ?Sized, +{ + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + let mut hasher = build_hasher.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } +} + +#[cfg(feature = "specialize")] +impl<T> CallHasher for T +where + T: Hash + ?Sized, +{ + #[inline] + default fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + let mut hasher = build_hasher.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } +} + +macro_rules! call_hasher_impl { + ($typ:ty) => { + #[cfg(feature = "specialize")] + impl CallHasher for $typ { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_u64(value) + } + } + }; +} +call_hasher_impl!(u8); +call_hasher_impl!(u16); +call_hasher_impl!(u32); +call_hasher_impl!(u64); +call_hasher_impl!(i8); +call_hasher_impl!(i16); +call_hasher_impl!(i32); +call_hasher_impl!(i64); + +#[cfg(feature = "specialize")] +impl CallHasher for u128 { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_fixed_length(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for i128 { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_fixed_length(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for usize { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_fixed_length(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for isize { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_fixed_length(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for [u8] { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_str(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for Vec<u8> { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_str(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for str { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_str(value) + } +} + +#[cfg(all(feature = "specialize"))] +impl CallHasher for String { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_str(value) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::*; + + #[test] + #[cfg(feature = "specialize")] + pub fn test_specialized_invoked() { + let build_hasher = RandomState::with_seeds(1, 2, 3, 4); + let shortened = u64::get_hash(&0, &build_hasher); + let mut hasher = AHasher::new_with_keys(1, 2); + 0_u64.hash(&mut hasher); + assert_ne!(hasher.finish(), shortened); + } + + /// Tests that some non-trivial transformation takes place. + #[test] + pub fn test_input_processed() { + let build_hasher = RandomState::with_seeds(2, 2, 2, 2); + assert_ne!(0, u64::get_hash(&0, &build_hasher)); + assert_ne!(1, u64::get_hash(&0, &build_hasher)); + assert_ne!(2, u64::get_hash(&0, &build_hasher)); + assert_ne!(3, u64::get_hash(&0, &build_hasher)); + assert_ne!(4, u64::get_hash(&0, &build_hasher)); + assert_ne!(5, u64::get_hash(&0, &build_hasher)); + + assert_ne!(0, u64::get_hash(&1, &build_hasher)); + assert_ne!(1, u64::get_hash(&1, &build_hasher)); + assert_ne!(2, u64::get_hash(&1, &build_hasher)); + assert_ne!(3, u64::get_hash(&1, &build_hasher)); + assert_ne!(4, u64::get_hash(&1, &build_hasher)); + assert_ne!(5, u64::get_hash(&1, &build_hasher)); + + let xored = u64::get_hash(&0, &build_hasher) ^ u64::get_hash(&1, &build_hasher); + assert_ne!(0, xored); + assert_ne!(1, xored); + assert_ne!(2, xored); + assert_ne!(3, xored); + assert_ne!(4, xored); + assert_ne!(5, xored); + } + + #[test] + pub fn test_ref_independent() { + let build_hasher = RandomState::with_seeds(1, 2, 3, 4); + assert_eq!(u8::get_hash(&&1, &build_hasher), u8::get_hash(&1, &build_hasher)); + assert_eq!(u16::get_hash(&&2, &build_hasher), u16::get_hash(&2, &build_hasher)); + assert_eq!(u32::get_hash(&&3, &build_hasher), u32::get_hash(&3, &build_hasher)); + assert_eq!(u64::get_hash(&&4, &build_hasher), u64::get_hash(&4, &build_hasher)); + assert_eq!(u128::get_hash(&&5, &build_hasher), u128::get_hash(&5, &build_hasher)); + assert_eq!( + str::get_hash(&"test", &build_hasher), + str::get_hash("test", &build_hasher) + ); + assert_eq!( + str::get_hash(&"test", &build_hasher), + String::get_hash(&"test".to_string(), &build_hasher) + ); + #[cfg(feature = "specialize")] + assert_eq!( + str::get_hash(&"test", &build_hasher), + <[u8]>::get_hash("test".as_bytes(), &build_hasher) + ); + + let build_hasher = RandomState::with_seeds(10, 20, 30, 40); + assert_eq!(u8::get_hash(&&&1, &build_hasher), u8::get_hash(&1, &build_hasher)); + assert_eq!(u16::get_hash(&&&2, &build_hasher), u16::get_hash(&2, &build_hasher)); + assert_eq!(u32::get_hash(&&&3, &build_hasher), u32::get_hash(&3, &build_hasher)); + assert_eq!(u64::get_hash(&&&4, &build_hasher), u64::get_hash(&4, &build_hasher)); + assert_eq!(u128::get_hash(&&&5, &build_hasher), u128::get_hash(&5, &build_hasher)); + assert_eq!( + str::get_hash(&&"test", &build_hasher), + str::get_hash("test", &build_hasher) + ); + assert_eq!( + str::get_hash(&&"test", &build_hasher), + String::get_hash(&"test".to_string(), &build_hasher) + ); + #[cfg(feature = "specialize")] + assert_eq!( + str::get_hash(&&"test", &build_hasher), + <[u8]>::get_hash(&"test".to_string().into_bytes(), &build_hasher) + ); + } +} diff --git a/tests/bench.rs b/tests/bench.rs new file mode 100644 index 0000000..5bc0fc9 --- /dev/null +++ b/tests/bench.rs @@ -0,0 +1,198 @@ +#![cfg_attr(feature = "specialize", feature(build_hasher_simple_hash_one))] + +use ahash::{AHasher, RandomState}; +use criterion::*; +use fxhash::FxHasher; +use rand::Rng; +use std::collections::hash_map::DefaultHasher; +use std::hash::{BuildHasherDefault, Hash, Hasher}; + +// Needs to be in sync with `src/lib.rs` +const AHASH_IMPL: &str = if cfg!(any( + all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "aes", + not(miri), + ), + all( + any(target_arch = "arm", target_arch = "aarch64"), + any(target_feature = "aes", target_feature = "crypto"), + not(miri), + feature = "stdsimd", + ), +)) { + "aeshash" +} else { + "fallbackhash" +}; + +fn ahash<H: Hash>(b: &H) -> u64 { + let build_hasher = RandomState::with_seeds(1, 2, 3, 4); + build_hasher.hash_one(b) +} + +fn fnvhash<H: Hash>(b: &H) -> u64 { + let mut hasher = fnv::FnvHasher::default(); + b.hash(&mut hasher); + hasher.finish() +} + +fn siphash<H: Hash>(b: &H) -> u64 { + let mut hasher = DefaultHasher::default(); + b.hash(&mut hasher); + hasher.finish() +} + +fn fxhash<H: Hash>(b: &H) -> u64 { + let mut hasher = FxHasher::default(); + b.hash(&mut hasher); + hasher.finish() +} + +fn seahash<H: Hash>(b: &H) -> u64 { + let mut hasher = seahash::SeaHasher::default(); + b.hash(&mut hasher); + hasher.finish() +} + +const STRING_LENGTHS: [u32; 12] = [1, 3, 4, 7, 8, 15, 16, 24, 33, 68, 132, 1024]; + +fn gen_strings() -> Vec<String> { + STRING_LENGTHS + .iter() + .map(|len| { + let mut string = String::default(); + for pos in 1..=*len { + let c = (48 + (pos % 10) as u8) as char; + string.push(c); + } + string + }) + .collect() +} + +macro_rules! bench_inputs { + ($group:ident, $hash:ident) => { + // Number of iterations per batch should be high enough to hide timing overhead. + let size = BatchSize::NumIterations(2_000); + + let mut rng = rand::thread_rng(); + $group.bench_function("u8", |b| b.iter_batched(|| rng.gen::<u8>(), |v| $hash(&v), size)); + $group.bench_function("u16", |b| b.iter_batched(|| rng.gen::<u16>(), |v| $hash(&v), size)); + $group.bench_function("u32", |b| b.iter_batched(|| rng.gen::<u32>(), |v| $hash(&v), size)); + $group.bench_function("u64", |b| b.iter_batched(|| rng.gen::<u64>(), |v| $hash(&v), size)); + $group.bench_function("u128", |b| b.iter_batched(|| rng.gen::<u128>(), |v| $hash(&v), size)); + $group.bench_with_input("strings", &gen_strings(), |b, s| b.iter(|| $hash(black_box(s)))); + }; +} + +fn bench_ahash(c: &mut Criterion) { + let mut group = c.benchmark_group(AHASH_IMPL); + bench_inputs!(group, ahash); +} + +fn bench_fx(c: &mut Criterion) { + let mut group = c.benchmark_group("fx"); + bench_inputs!(group, fxhash); +} + +fn bench_fnv(c: &mut Criterion) { + let mut group = c.benchmark_group("fnv"); + bench_inputs!(group, fnvhash); +} + +fn bench_sea(c: &mut Criterion) { + let mut group = c.benchmark_group("sea"); + bench_inputs!(group, seahash); +} + +fn bench_sip(c: &mut Criterion) { + let mut group = c.benchmark_group("sip"); + bench_inputs!(group, siphash); +} + +fn bench_map(c: &mut Criterion) { + #[cfg(feature = "std")] + { + let mut group = c.benchmark_group("map"); + group.bench_function("aHash-alias", |b| { + b.iter(|| { + let hm: ahash::HashMap<i32, i32> = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + }) + }); + group.bench_function("aHash-hashBrown", |b| { + b.iter(|| { + let hm: hashbrown::HashMap<i32, i32> = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + }) + }); + group.bench_function("aHash-hashBrown-explicit", |b| { + b.iter(|| { + let hm: hashbrown::HashMap<i32, i32, RandomState> = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + }) + }); + group.bench_function("aHash-wrapper", |b| { + b.iter(|| { + let hm: ahash::AHashMap<i32, i32> = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + }) + }); + group.bench_function("aHash-rand", |b| { + b.iter(|| { + let hm: std::collections::HashMap<i32, i32, RandomState> = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + }) + }); + group.bench_function("aHash-default", |b| { + b.iter(|| { + let hm: std::collections::HashMap<i32, i32, BuildHasherDefault<AHasher>> = + (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + }) + }); + } +} + +criterion_main!(benches); + +criterion_group!( + benches, + bench_ahash, + bench_fx, + bench_fnv, + bench_sea, + bench_sip, + bench_map +); diff --git a/tests/map_tests.rs b/tests/map_tests.rs new file mode 100644 index 0000000..8d798a0 --- /dev/null +++ b/tests/map_tests.rs @@ -0,0 +1,234 @@ +#![cfg_attr(feature = "specialize", feature(build_hasher_simple_hash_one))] + +use std::hash::{BuildHasher, Hash, Hasher}; + +use ahash::RandomState; +use criterion::*; +use fxhash::FxHasher; + +fn gen_word_pairs() -> Vec<String> { + let words: Vec<_> = r#" +a, ability, able, about, above, accept, according, account, across, act, action, +activity, actually, add, address, administration, admit, adult, affect, after, +again, against, age, agency, agent, ago, agree, agreement, ahead, air, all, +allow, almost, alone, along, already, also, although, always, American, among, +amount, analysis, and, animal, another, answer, any, anyone, anything, appear, +apply, approach, area, argue, arm, around, arrive, art, article, artist, as, +ask, assume, at, attack, attention, attorney, audience, author, authority, +available, avoid, away, baby, back, bad, bag, ball, bank, bar, base, be, beat, +beautiful, because, become, bed, before, begin, behavior, behind, believe, +benefit, best, better, between, beyond, big, bill, billion, bit, black, blood, +blue, board, body, book, born, both, box, boy, break, bring, brother, budget, +build, building, business, but, buy, by, call, camera, campaign, can, cancer, +candidate, capital, car, card, care, career, carry, case, catch, cause, cell, +center, central, century, certain, certainly, chair, challenge, chance, change, +character, charge, check, child, choice, choose, church, citizen, city, civil, +claim, class, clear, clearly, close, coach, cold, collection, college, color, +come, commercial, common, community, company, compare, computer, concern, +condition, conference, Congress, consider, consumer, contain, continue, control, +cost, could, country, couple, course, court, cover, create, crime, cultural, +culture, cup, current, customer, cut, dark, data, daughter, day, dead, deal, +death, debate, decade, decide, decision, deep, defense, degree, Democrat, +democratic, describe, design, despite, detail, determine, develop, development, +die, difference, different, difficult, dinner, direction, director, discover, +discuss, discussion, disease, do, doctor, dog, door, down, draw, dream, drive, +drop, drug, during, each, early, east, easy, eat, economic, economy, edge, +education, effect, effort, eight, either, election, else, employee, end, energy, +enjoy, enough, enter, entire, environment, environmental, especially, establish, +even, evening, event, ever, every, everybody, everyone, everything, evidence, +exactly, example, executive, exist, expect, experience, expert, explain, eye, +face, fact, factor, fail, fall, family, far, fast, father, fear, federal, feel, +feeling, few, field, fight, figure, fill, film, final, finally, financial, find, +fine, finger, finish, fire, firm, first, fish, five, floor, fly, focus, follow, +food, foot, for, force, foreign, forget, form, former, forward, four, free, +friend, from, front, full, fund, future, game, garden, gas, general, generation, +get, girl, give, glass, go, goal, good, government, great, green, ground, group, +grow, growth, guess, gun, guy, hair, half, hand, hang, happen, happy, hard, +have, he, head, health, hear, heart, heat, heavy, help, her, here, herself, +high, him, himself, his, history, hit, hold, home, hope, hospital, hot, hotel, +hour, house, how, however, huge, human, hundred, husband, I, idea, identify, if, +image, imagine, impact, important, improve, in, include, including, increase, +indeed, indicate, individual, industry, information, inside, instead, +institution, interest, interesting, international, interview, into, investment, +involve, issue, it, item, its, itself, job, join, just, keep, key, kid, kill, +kind, kitchen, know, knowledge, land, language, large, last, late, later, laugh, +law, lawyer, lay, lead, leader, learn, least, leave, left, leg, legal, less, +let, letter, level, lie, life, light, like, likely, line, list, listen, little, +live, local, long, look, lose, loss, lot, love, low, machine, magazine, main, +maintain, major, majority, make, man, manage, management, manager, many, market, +marriage, material, matter, may, maybe, me, mean, measure, media, medical, meet, +meeting, member, memory, mention, message, method, middle, might, military, +million, mind, minute, miss, mission, model, modern, moment, money, month, more, +morning, most, mother, mouth, move, movement, movie, Mr, Mrs, much, music, must, +my, myself, name, nation, national, natural, nature, near, nearly, necessary, +need, network, never, new, news, newspaper, next, nice, night, no, none, nor, +north, not, note, nothing, notice, now, n't, number, occur, of, off, offer, +office, officer, official, often, oh, oil, ok, old, on, once, one, only, onto, +open, operation, opportunity, option, or, order, organization, other, others, +our, out, outside, over, own, owner, page, pain, painting, paper, parent, part, +participant, particular, particularly, partner, party, pass, past, patient, +pattern, pay, peace, people, per, perform, performance, perhaps, period, person, +personal, phone, physical, pick, picture, piece, place, plan, plant, play, +player, PM, point, police, policy, political, politics, poor, popular, +population, position, positive, possible, power, practice, prepare, present, +president, pressure, pretty, prevent, price, private, probably, problem, +process, produce, product, production, professional, professor, program, +project, property, protect, prove, provide, public, pull, purpose, push, put, +quality, question, quickly, quite, race, radio, raise, range, rate, rather, +reach, read, ready, real, reality, realize, really, reason, receive, recent, +recently, recognize, record, red, reduce, reflect, region, relate, relationship, +religious, remain, remember, remove, report, represent, Republican, require, +research, resource, respond, response, responsibility, rest, result, return, +reveal, rich, right, rise, risk, road, rock, role, room, rule, run, safe, same, +save, say, scene, school, science, scientist, score, sea, season, seat, second, +section, security, see, seek, seem, sell, send, senior, sense, series, serious, +serve, service, set, seven, several, sex, sexual, shake, share, she, shoot, +short, shot, should, shoulder, show, side, sign, significant, similar, simple, +simply, since, sing, single, sister, sit, site, situation, six, size, skill, +skin, small, smile, so, social, society, soldier, some, somebody, someone, +something, sometimes, son, song, soon, sort, sound, source, south, southern, +space, speak, special, specific, speech, spend, sport, spring, staff, stage, +stand, standard, star, start, state, statement, station, stay, step, still, +stock, stop, store, story, strategy, street, strong, structure, student, study, +stuff, style, subject, success, successful, such, suddenly, suffer, suggest, +summer, support, sure, surface, system, table, take, talk, task, tax, teach, +teacher, team, technology, television, tell, ten, tend, term, test, than, thank, +that, the, their, them, themselves, then, theory, there, these, they, thing, +think, third, this, those, though, thought, thousand, threat, three, through, +throughout, throw, thus, time, to, today, together, tonight, too, top, total, +tough, toward, town, trade, traditional, training, travel, treat, treatment, +tree, trial, trip, trouble, true, truth, try, turn, TV, two, type, under, +understand, unit, until, up, upon, us, use, usually, value, various, very, +victim, view, violence, visit, voice, vote, wait, walk, wall, want, war, watch, +water, way, we, weapon, wear, week, weight, well, west, western, what, whatever, +when, where, whether, which, while, white, who, whole, whom, whose, why, wide, +wife, will, win, wind, window, wish, with, within, without, woman, wonder, word, +work, worker, world, worry, would, write, writer, wrong, yard, yeah, year, yes, +yet, you, young, your, yourself"# + .split(',') + .map(|word| word.trim()) + .collect(); + + let mut word_pairs: Vec<_> = Vec::new(); + for word in &words { + for other_word in &words { + word_pairs.push(word.to_string() + " " + other_word); + } + } + assert_eq!(1_000_000, word_pairs.len()); + word_pairs +} + +#[allow(unused)] // False positive +fn test_hash_common_words<B: BuildHasher>(build_hasher: &B) { + let word_pairs: Vec<_> = gen_word_pairs(); + check_for_collisions(build_hasher, &word_pairs, 32); +} + +#[allow(unused)] // False positive +fn check_for_collisions<H: Hash, B: BuildHasher>(build_hasher: &B, items: &[H], bucket_count: usize) { + let mut buckets = vec![0; bucket_count]; + for item in items { + let value = hash(item, build_hasher) as usize; + buckets[value % bucket_count] += 1; + } + let mean = items.len() / bucket_count; + let max = *buckets.iter().max().unwrap(); + let min = *buckets.iter().min().unwrap(); + assert!( + (min as f64) > (mean as f64) * 0.95, + "min: {}, max:{}, {:?}", + min, + max, + buckets + ); + assert!( + (max as f64) < (mean as f64) * 1.05, + "min: {}, max:{}, {:?}", + min, + max, + buckets + ); +} + +#[cfg(feature = "specialize")] +#[allow(unused)] // False positive +fn hash<H: Hash, B: BuildHasher>(b: &H, build_hasher: &B) -> u64 { + build_hasher.hash_one(b) +} + +#[cfg(not(feature = "specialize"))] +#[allow(unused)] // False positive +fn hash<H: Hash, B: BuildHasher>(b: &H, build_hasher: &B) -> u64 { + let mut hasher = build_hasher.build_hasher(); + b.hash(&mut hasher); + hasher.finish() +} + +#[test] +fn test_bucket_distribution() { + let build_hasher = RandomState::with_seeds(1, 2, 3, 4); + test_hash_common_words(&build_hasher); + let sequence: Vec<_> = (0..320000).collect(); + check_for_collisions(&build_hasher, &sequence, 32); + let sequence: Vec<_> = (0..2560000).collect(); + check_for_collisions(&build_hasher, &sequence, 256); + let sequence: Vec<_> = (0..320000).map(|i| i * 1024).collect(); + check_for_collisions(&build_hasher, &sequence, 32); + let sequence: Vec<_> = (0..2560000_u64).map(|i| i * 1024).collect(); + check_for_collisions(&build_hasher, &sequence, 256); +} + +#[cfg(feature = "std")] +#[test] +fn test_ahash_alias_map_construction() { + let mut map = ahash::HashMap::default(); + map.insert(1, "test"); + use ahash::HashMapExt; + let mut map = ahash::HashMap::with_capacity(1234); + map.insert(1, "test"); +} + +#[cfg(feature = "std")] +#[test] +fn test_ahash_alias_set_construction() { + let mut set = ahash::HashSet::default(); + set.insert(1); + + use ahash::HashSetExt; + let mut set = ahash::HashSet::with_capacity(1235); + set.insert(1); +} + +fn ahash_vec<H: Hash>(b: &Vec<H>) -> u64 { + let mut total: u64 = 0; + for item in b { + let mut hasher = RandomState::with_seeds(12, 34, 56, 78).build_hasher(); + item.hash(&mut hasher); + total = total.wrapping_add(hasher.finish()); + } + total +} + +fn fxhash_vec<H: Hash>(b: &Vec<H>) -> u64 { + let mut total: u64 = 0; + for item in b { + let mut hasher = FxHasher::default(); + item.hash(&mut hasher); + total = total.wrapping_add(hasher.finish()); + } + total +} + +fn bench_ahash_words(c: &mut Criterion) { + let words = gen_word_pairs(); + c.bench_function("aes_words", |b| b.iter(|| black_box(ahash_vec(&words)))); +} + +fn bench_fx_words(c: &mut Criterion) { + let words = gen_word_pairs(); + c.bench_function("fx_words", |b| b.iter(|| black_box(fxhash_vec(&words)))); +} + +criterion_main!(benches); +criterion_group!(benches, bench_ahash_words, bench_fx_words,); diff --git a/tests/nopanic.rs b/tests/nopanic.rs new file mode 100644 index 0000000..56f754c --- /dev/null +++ b/tests/nopanic.rs @@ -0,0 +1,81 @@ +use ahash::{AHasher, RandomState}; +use std::hash::{BuildHasher, Hash, Hasher}; + +#[macro_use] +extern crate no_panic; + +#[inline(never)] +#[no_panic] +fn hash_test_final(num: i32, string: &str) -> (u64, u64) { + use core::hash::Hasher; + let mut hasher1 = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); + let mut hasher2 = RandomState::with_seeds(3, 4, 5, 6).build_hasher(); + hasher1.write_i32(num); + hasher2.write(string.as_bytes()); + (hasher1.finish(), hasher2.finish()) +} + +#[inline(never)] +fn hash_test_final_wrapper(num: i32, string: &str) { + hash_test_final(num, string); +} + +struct SimpleBuildHasher { + hasher: AHasher, +} + +impl SimpleBuildHasher { + fn hash_one<T: Hash>(&self, x: T) -> u64 + where + Self: Sized, + { + let mut hasher = self.build_hasher(); + x.hash(&mut hasher); + hasher.finish() + } +} + +impl BuildHasher for SimpleBuildHasher { + type Hasher = AHasher; + + fn build_hasher(&self) -> Self::Hasher { + self.hasher.clone() + } +} + +#[inline(never)] +#[no_panic] +fn hash_test_specialize(num: i32, string: &str) -> (u64, u64) { + let hasher1 = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); + let hasher2 = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); + ( + SimpleBuildHasher { hasher: hasher1 }.hash_one(num), + SimpleBuildHasher { hasher: hasher2 }.hash_one(string.as_bytes()), + ) +} + +#[inline(never)] +fn hash_test_random_wrapper(num: i32, string: &str) { + hash_test_specialize(num, string); +} + +#[inline(never)] +#[no_panic] +fn hash_test_random(num: i32, string: &str) -> (u64, u64) { + let build_hasher1 = RandomState::with_seeds(1, 2, 3, 4); + let build_hasher2 = RandomState::with_seeds(1, 2, 3, 4); + (build_hasher1.hash_one(&num), build_hasher2.hash_one(string.as_bytes())) +} + +#[inline(never)] +fn hash_test_specialize_wrapper(num: i32, string: &str) { + hash_test_specialize(num, string); +} + +#[test] +fn test_no_panic() { + hash_test_final_wrapper(2, "Foo"); + hash_test_specialize_wrapper(2, "Bar"); + hash_test_random(2, "Baz"); + hash_test_random_wrapper(2, "Bat"); +} |