summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJinWang An <jinwang.an@samsung.com>2021-08-03 16:30:32 +0900
committerJinWang An <jinwang.an@samsung.com>2021-08-03 16:30:32 +0900
commit6b6c8464f9afa1913fc25a79ce78ac9a712f3723 (patch)
tree47eaacea5a0fb98c7a2b28c83fb897174bad5cdd
parent72bde18b29fe5c0badacd150129c413546aeaecb (diff)
downloadccache-6b6c8464f9afa1913fc25a79ce78ac9a712f3723.tar.gz
ccache-6b6c8464f9afa1913fc25a79ce78ac9a712f3723.tar.bz2
ccache-6b6c8464f9afa1913fc25a79ce78ac9a712f3723.zip
Imported Upstream version 4.2upstream/4.2
-rw-r--r--.github/ISSUE_TEMPLATE/bug_report.md2
-rw-r--r--.github/ISSUE_TEMPLATE/config.yml4
-rw-r--r--.github/ISSUE_TEMPLATE/feature_request.md2
-rw-r--r--.github/ISSUE_TEMPLATE/improvement.md2
-rw-r--r--.github/ISSUE_TEMPLATE/support.md10
-rw-r--r--.github/workflows/build.yaml100
-rw-r--r--.github/workflows/codeql-analysis.yaml49
-rw-r--r--.mailmap1
-rw-r--r--CMakeLists.txt22
-rw-r--r--CONTRIBUTING.md21
-rw-r--r--LICENSE.adoc66
-rw-r--r--README.md1
-rwxr-xr-xci/build-and-verify-source-package3
-rw-r--r--cmake/CIBuildType.cmake4
-rw-r--r--cmake/CcachePackConfig.cmake4
-rw-r--r--cmake/CcacheVersion.cmake30
-rw-r--r--cmake/CheckAsmCompilerFlag.cmake62
-rw-r--r--cmake/DefaultBuildType.cmake2
-rw-r--r--cmake/DevModeWarnings.cmake136
-rw-r--r--cmake/Findzstd.cmake2
-rw-r--r--cmake/GenerateConfigurationFile.cmake27
-rw-r--r--cmake/GenerateVersionFile.cmake2
-rw-r--r--cmake/StandardWarnings.cmake158
-rw-r--r--cmake/UseCcache.cmake69
-rw-r--r--cmake/UseFastestLinker.cmake34
-rw-r--r--cmake/config.h.in10
-rw-r--r--cmake/version.cpp.in2
-rw-r--r--doc/AUTHORS.adoc6
-rw-r--r--doc/CMakeLists.txt10
-rw-r--r--doc/INSTALL.md2
-rw-r--r--doc/MANUAL.adoc282
-rw-r--r--doc/NEWS.adoc110
-rwxr-xr-xmisc/format-files2
-rw-r--r--src/.clang-tidy8
-rw-r--r--src/CMakeLists.txt5
-rw-r--r--src/CacheFile.hpp6
-rw-r--r--src/Config.cpp14
-rw-r--r--src/Config.hpp32
-rw-r--r--src/Context.cpp10
-rw-r--r--src/Context.hpp3
-rw-r--r--src/Counters.cpp2
-rw-r--r--src/Depfile.hpp2
-rw-r--r--src/Hash.cpp4
-rw-r--r--src/InodeCache.cpp97
-rw-r--r--src/InodeCache.hpp7
-rw-r--r--src/Lockfile.cpp2
-rw-r--r--src/Logging.cpp7
-rw-r--r--src/Manifest.cpp6
-rw-r--r--src/NonCopyable.hpp8
-rw-r--r--src/Result.cpp2
-rw-r--r--src/ResultRetriever.cpp63
-rw-r--r--src/SignalHandler.cpp7
-rw-r--r--src/SignalHandler.hpp4
-rw-r--r--src/Sloppiness.hpp41
-rw-r--r--src/Statistic.hpp58
-rw-r--r--src/Statistics.hpp40
-rw-r--r--src/TemporaryFile.cpp33
-rw-r--r--src/Util.cpp120
-rw-r--r--src/Util.hpp20
-rw-r--r--src/argprocessing.cpp35
-rw-r--r--src/argprocessing.hpp10
-rw-r--r--src/ccache.cpp208
-rw-r--r--src/ccache.hpp20
-rw-r--r--src/cleanup.cpp44
-rw-r--r--src/compress.cpp32
-rw-r--r--src/exceptions.hpp36
-rw-r--r--src/hashutil.cpp14
-rw-r--r--src/language.cpp25
-rw-r--r--src/language.hpp14
-rw-r--r--src/system.hpp3
-rw-r--r--src/third_party/CMakeLists.txt6
-rw-r--r--src/third_party/blake3/CMakeLists.txt134
-rw-r--r--src/third_party/blake3/blake3.c4
-rw-r--r--src/third_party/blake3/blake3.h2
-rw-r--r--src/third_party/blake3/blake3_avx2_x86-64_windows_msvc.asm1828
-rw-r--r--src/third_party/blake3/blake3_avx512_x86-64_windows_msvc.asm2634
-rw-r--r--src/third_party/blake3/blake3_dispatch.c6
-rw-r--r--src/third_party/blake3/blake3_sse2_x86-64_windows_msvc.asm8
-rw-r--r--src/third_party/blake3/blake3_sse41_x86-64_windows_msvc.asm2089
-rw-r--r--src/third_party/doctest.h334
-rw-r--r--src/third_party/fmt/core.h446
-rw-r--r--src/third_party/fmt/format-inl.h1894
-rw-r--r--src/third_party/fmt/format.h1157
-rw-r--r--src/third_party/format.cpp34
-rw-r--r--src/third_party/nonstd/optional.hpp249
-rw-r--r--src/third_party/win32/mktemp.c260
-rw-r--r--src/third_party/win32/mktemp.h18
-rw-r--r--test/CMakeLists.txt49
-rwxr-xr-xtest/run61
-rw-r--r--test/suites/base.bash34
-rw-r--r--test/suites/cache_levels.bash2
-rw-r--r--test/suites/cleanup.bash5
-rw-r--r--test/suites/color_diagnostics.bash17
-rw-r--r--test/suites/inode_cache.bash5
-rw-r--r--test/suites/nvcc.bash4
-rw-r--r--test/suites/pch.bash23
-rw-r--r--test/suites/source_date_epoch.bash99
-rw-r--r--test/suites/split_dwarf.bash34
-rw-r--r--unittest/CMakeLists.txt4
-rw-r--r--unittest/test_Config.cpp7
-rw-r--r--unittest/test_Counters.cpp2
-rw-r--r--unittest/test_Lockfile.cpp9
-rw-r--r--unittest/test_Statistics.cpp1
-rw-r--r--unittest/test_Util.cpp133
-rw-r--r--unittest/test_argprocessing.cpp2
-rw-r--r--unittest/test_bsdmkstemp.cpp206
-rw-r--r--unittest/test_ccache.cpp1
107 files changed, 12096 insertions, 1984 deletions
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index d5d0167..5d6c810 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,5 +1,5 @@
---
-name: Bug report
+name: 🐞 Bug report
about: Create a report to help us improve
title: ''
labels: bug
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..e587130
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,4 @@
+contact_links:
+ - name: 🤔 Question or discussion
+ url: https://github.com/ccache/ccache/discussions
+ about: Please go to https://github.com/ccache/ccache/discussions
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 38887e1..b89d296 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,5 +1,5 @@
---
-name: Feature request
+name: ✨ Feature request
about: Suggest a new feature for this project
title: ''
labels: feature
diff --git a/.github/ISSUE_TEMPLATE/improvement.md b/.github/ISSUE_TEMPLATE/improvement.md
index de24208..542ec0e 100644
--- a/.github/ISSUE_TEMPLATE/improvement.md
+++ b/.github/ISSUE_TEMPLATE/improvement.md
@@ -1,5 +1,5 @@
---
-name: Improvement
+name: 📈 Improvement
about: Suggest an improvement that is neither a bug fix nor a new feature
title: ''
labels: improvement
diff --git a/.github/ISSUE_TEMPLATE/support.md b/.github/ISSUE_TEMPLATE/support.md
deleted file mode 100644
index 9d10686..0000000
--- a/.github/ISSUE_TEMPLATE/support.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: Question
-about: Ask for support or make an enquiry
-title: ''
-labels: support
-assignees: ''
-
----
-### Question ###
-<!-- What do you want help with or know about? -->
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 705f1a7..c222a19 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -7,11 +7,15 @@ env:
CTEST_OUTPUT_ON_FAILURE: ON
VERBOSE: 1
+defaults:
+ run:
+ shell: bash
+
jobs:
build_and_test:
env:
CMAKE_GENERATOR: Ninja
-
+
name: ${{ matrix.config.os }}-${{ matrix.config.compiler }}-${{ matrix.config.version }}
runs-on: ${{ matrix.config.os }}
strategy:
@@ -46,15 +50,9 @@ jobs:
compiler: gcc
version: "10"
- # Enable after https://github.com/ccache/ccache/pull/693
- # - os: ubuntu-16.04
- # compiler: clang
- # version: "3.5"
-
- # Enable after https://github.com/ccache/ccache/pull/693
- # - os: ubuntu-16.04
- # compiler: clang
- # version: "5.0"
+ - os: ubuntu-16.04
+ compiler: clang
+ version: "5.0"
- os: ubuntu-16.04
compiler: clang
@@ -91,10 +89,14 @@ jobs:
- name: Install dependencies
run: |
if [ "${{ runner.os }}" = "Linux" ]; then
+ sudo apt-get update
+
+ # Install ld.gold (binutils) and ld.lld on different runs.
+ # Binding to Ubuntu 20 has no special meaning.
if [ "${{ matrix.config.os }}" = "ubuntu-20.04" ]; then
- sudo apt-get install -y ninja-build elfutils libzstd-dev
+ sudo apt-get install -y ninja-build elfutils libzstd-dev lld
else
- sudo apt-get install -y ninja-build elfutils libzstd1-dev
+ sudo apt-get install -y ninja-build elfutils libzstd1-dev binutils
fi
if [ "${{ matrix.config.compiler }}" = "gcc" ]; then
@@ -106,11 +108,12 @@ jobs:
echo "CC=clang-${{ matrix.config.version }}" >> $GITHUB_ENV
echo "CXX=clang++-${{ matrix.config.version }}" >> $GITHUB_ENV
- sudo apt update
sudo apt install -y clang-${{ matrix.config.version }} g++-multilib
fi
elif [ "${{ runner.os }}" = "macOS" ]; then
- brew install ninja
+ HOMEBREW_NO_AUTO_UPDATE=1 HOMEBREW_NO_INSTALL_CLEANUP=1 \
+ brew install ninja
+
if [ "${{ matrix.config.compiler }}" = "gcc" ]; then
brew install gcc@${{ matrix.config.version }}
echo "CC=gcc-${{ matrix.config.version }}" >> $GITHUB_ENV
@@ -139,7 +142,7 @@ jobs:
if: failure()
uses: actions/upload-artifact@v2
with:
- name: ${{ matrix.config.os }}-${{ matrix.config.compiler }}-{{ matrix.config.version }}-testdir.tar.xz
+ name: ${{ matrix.config.os }}-${{ matrix.config.compiler }}-${{ matrix.config.version }}-testdir.tar.xz
path: testdir.tar.xz
specific_tests:
@@ -192,10 +195,32 @@ jobs:
CC: x86_64-w64-mingw32-gcc-posix
CXX: x86_64-w64-mingw32-g++-posix
ENABLE_CACHE_CLEANUP_TESTS: 1
- CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DCMAKE_SYSTEM_NAME=Windows -DZSTD_FROM_INTERNET=ON -DSTATIC_LINK=ON
+ CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DCMAKE_SYSTEM_NAME=Windows -DZSTD_FROM_INTERNET=ON
RUN_TESTS: unittest-in-wine
apt_get: elfutils mingw-w64 wine
+ - name: Windows VS2019 32-bit
+ os: windows-2019
+ msvc_arch: x64_x86
+ allow_test_failures: true # For now, don't fail the build on failure
+ CC: cl
+ CXX: cl
+ ENABLE_CACHE_CLEANUP_TESTS: 1
+ CMAKE_GENERATOR: Ninja
+ CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DZSTD_FROM_INTERNET=ON
+ TEST_CC: clang -target i686-pc-windows-msvc
+
+ - name: Windows VS2019 64-bit
+ os: windows-2019
+ msvc_arch: x64
+ allow_test_failures: true # For now, don't fail the build on failure
+ CC: cl
+ CXX: cl
+ ENABLE_CACHE_CLEANUP_TESTS: 1
+ CMAKE_GENERATOR: Ninja
+ CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DZSTD_FROM_INTERNET=ON
+ TEST_CC: clang -target x86_64-pc-windows-msvc
+
- name: Clang address & UB sanitizer
os: ubuntu-20.04
CC: clang
@@ -261,15 +286,40 @@ jobs:
- name: Run apt-get
if: matrix.config.apt_get != ''
- run: sudo apt-get install ${{ matrix.config.apt_get }}
+ run: sudo apt-get update && sudo apt-get install ${{ matrix.config.apt_get }}
+
+ - name: Prepare Windows environment (Visual Studio)
+ if: runner.os == 'Windows'
+ uses: ilammy/msvc-dev-cmd@v1.5.0
+ with:
+ arch: ${{ matrix.config.msvc_arch }}
+
+ - name: Prepare Windows environment (Clang)
+ if: runner.os == 'Windows'
+ shell: powershell
+ run: |
+ $ErrorActionPreference = 'Stop'
+
+ # The test suite currently requires that the compiler specified by the
+ # "CC" environment variable is on a path without spaces. Provide that
+ # by creating a junction from ~/opt/llvm to the Visual Studio path.
+ $null = New-Item `
+ -Path "${HOME}\opt\llvm" `
+ -ItemType Junction `
+ -Target "${env:VCINSTALLDIR}\Tools\Llvm\x64" `
+ -Force
+ "Path=${HOME}\opt\llvm\bin;${env:Path}" | `
+ Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
- name: Build and test
+ id: build-and-test
env:
ASAN_OPTIONS: ${{ matrix.config.ASAN_OPTIONS }}
BUILDDIR: ${{ matrix.config.BUILDDIR }}
CC: ${{ matrix.config.CC }}
CCACHE_LOC: ${{ matrix.config.CCACHE_LOC }}
CFLAGS: ${{ matrix.config.CFLAGS }}
+ CMAKE_GENERATOR: ${{ matrix.config.CMAKE_GENERATOR }}
CMAKE_PARAMS: ${{ matrix.config.CMAKE_PARAMS }}
CXX: ${{ matrix.config.CXX }}
CXXFLAGS: ${{ matrix.config.CXXFLAGS }}
@@ -278,15 +328,23 @@ jobs:
LDFLAGS: ${{ matrix.config.LDFLAGS }}
RUN_TESTS: ${{ matrix.config.RUN_TESTS }}
SPECIAL: ${{ matrix.config.SPECIAL }}
- run: ci/build
+ TEST_CC: ${{ matrix.config.TEST_CC }}
+ run: |
+ rc=0
+ ci/build || rc=$?
+ echo "::set-output name=exit_status::$rc"
+ exit $rc
+ # CTest exits with return code 8 on test failure.
+ continue-on-error: ${{ matrix.config.allow_test_failures == true &&
+ steps.build-and-test.outputs.exit_status == 8 }}
- name: Collect testdir from failed tests
- if: failure()
+ if: failure() || steps.build-and-test.outcome == 'failure'
run: ci/collect-testdir
# TODO: in case of build-and-verify-*package the BUILDDIR is set within those scripts.
- name: Upload testdir from failed tests
- if: failure()
+ if: failure() || steps.build-and-test.outcome == 'failure'
uses: actions/upload-artifact@v2
with:
name: ${{ matrix.config.name }} - testdir.tar.xz
@@ -314,7 +372,7 @@ jobs:
uses: actions/checkout@v2
- name: Install codespell
- run: sudo apt-get install codespell
+ run: sudo apt-get update && sudo apt-get install codespell
- name: Run codespell
run: codespell -q 7 -S ".git,LICENSE.adoc,./src/third_party/*" -I misc/codespell-allowlist.txt
diff --git a/.github/workflows/codeql-analysis.yaml b/.github/workflows/codeql-analysis.yaml
new file mode 100644
index 0000000..972b6b9
--- /dev/null
+++ b/.github/workflows/codeql-analysis.yaml
@@ -0,0 +1,49 @@
+# More info:
+# https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning
+
+name: "CodeQL"
+
+on:
+ push:
+ branches: ["*"]
+ pull_request:
+ # The branches below must be a subset of the branches above
+ branches: ["*"]
+ paths-ignore:
+ - '**/*.adoc'
+ - '**/*.bash'
+ - '**/*.md'
+ schedule:
+ # Full scan once a week
+ - cron: '0 14 * * 3'
+
+jobs:
+ analyze:
+ name: Analyze
+ runs-on: ubuntu-18.04
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v2
+ with:
+ # We must fetch at least the immediate parents so that if this is
+ # a pull request then we can checkout the head.
+ fetch-depth: 2
+
+ - name: Install dependencies
+ run: sudo apt-get update && sudo apt-get install ninja-build elfutils libzstd1-dev
+
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v1
+ with:
+ languages: cpp
+ queries: +security-and-quality
+
+ - name: Build
+ run: ci/build
+ env:
+ RUN_TESTS: none
+ CMAKE_GENERATOR: Ninja
+
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v1
diff --git a/.mailmap b/.mailmap
index 0f2fe15..01d7797 100644
--- a/.mailmap
+++ b/.mailmap
@@ -11,6 +11,7 @@ Doug Anderson <dianders@disordat.com>
Erik Flodin <erik@ejohansson.se>
Hongli Lai <hongli@phusion.nl>
Jonny Yu <yingshen.yu@gmail.com>
+Ka Ho Ng <khng300@gmail.com>
Kona Blend <kona8lend@gmail.com>
Leanid Chaika <leanid.chaika@gmail.com>
Luboš Luňák <l.lunak@centrum.cz> <l.lunak@suse.cz>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1b36dc3..40e21a5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,11 @@
cmake_minimum_required(VERSION 3.4.3)
-project(ccache LANGUAGES C CXX ASM)
+project(ccache LANGUAGES C CXX)
+if(MSVC)
+ enable_language(ASM_MASM)
+else()
+ enable_language(ASM)
+endif()
set(CMAKE_PROJECT_DESCRIPTION "a fast C/C++ compiler cache")
if(NOT "${CMAKE_CXX_STANDARD}")
@@ -51,6 +56,17 @@ endif()
#
# Settings
#
+include(CcacheVersion)
+
+if("${CCACHE_VERSION_ORIGIN}" STREQUAL git OR DEFINED ENV{CI})
+ set(CCACHE_DEV_MODE ON)
+else()
+ set(CCACHE_DEV_MODE OFF)
+endif()
+message(STATUS "Ccache dev mode: ${CCACHE_DEV_MODE}")
+
+include(UseCcache)
+include(UseFastestLinker)
include(StandardSettings)
include(StandardWarnings)
include(CIBuildType)
@@ -71,7 +87,7 @@ include(GNUInstallDirs)
include(GenerateConfigurationFile)
include(GenerateVersionFile)
-if(HAVE_SYS_MMAN_H)
+if(HAVE_SYS_MMAN_H AND HAVE_PTHREAD_MUTEXATTR_SETPSHARED)
set(INODE_CACHE_SUPPORTED 1)
endif()
@@ -98,7 +114,7 @@ include(CodeAnalysis)
option(ENABLE_TRACING "Enable possibility to use internal ccache tracing" OFF)
if(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
- option(STATIC_LINK "Link statically with system libraries" OFF)
+ option(STATIC_LINK "Link statically with system libraries" ON)
endif()
#
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 46315c4..cb35bc0 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,8 +6,8 @@ Want to contribute to ccache? Awesome!
There are several options:
-1. Ask a question in the [issue
- tracker](https://github.com/ccache/ccache/issues/new/choose).
+1. Ask a question in
+ [discussions](https://github.com/ccache/ccache/issues/discussions).
2. Post your question to the [mailing
list](https://lists.samba.org/mailman/listinfo/ccache/).
3. Chat in the [Gitter room](https://gitter.im/ccache/ccache).
@@ -39,8 +39,8 @@ Here are some hints to make the process smoother:
avoid potentially wasting time on doing something that may need major rework
to be accepted, or maybe doesn't end up being accepted at all.
* Is your pull request "work in progress", i.e. you don't think that it's ready
- for merging yet but you want early comments and CI test results? Then create
- a draft pull request as described in [this Github blog
+ for merging yet but you want early comments and CI test results? Then create a
+ draft pull request as described in [this Github blog
post](https://github.blog/2019-02-14-introducing-draft-pull-requests/).
* Please follow the ccache's code style (see the section below).
* Consider [A Note About Git Commit
@@ -50,9 +50,9 @@ Here are some hints to make the process smoother:
## Code style
Ccache was written in C99 until 2019 when it started being converted to C++11.
-The conversion is a slow work in progress, which is why there is a lot of
-C-style code left. Please refrain from doing large C to C++ conversions; do it
-little by little.
+The conversion is a slow work in progress, which is why there is some C-style
+code left. Please refrain from doing large C to C++ conversions; do it little by
+little.
Source code formatting is defined by `.clang-format` in the root directory. The
format is loosely based on [LLVM's code formatting
@@ -60,14 +60,15 @@ style](https://llvm.org/docs/CodingStandards.html) with some exceptions. It's
highly recommended to install
[Clang-Format](https://clang.llvm.org/docs/ClangFormat.html) 6.0 or newer and
run `make format` to format changes according to ccache's code style. Or even
-better: set up your editor to run Clang-Format automatically when saving. If
-you don't run Clang-Format then the ccache authors have to do it for you.
+better: set up your editor to run Clang-Format automatically when saving. If you
+don't run Clang-Format then the ccache authors have to do it for you.
Please follow these conventions:
* Use `UpperCamelCase` for types (e.g. classes and structs) and namespaces.
* Use `UPPER_CASE` names for macros and (non-class )enum values.
-* Use `snake_case` for other names (functions, variables, enum class values, etc.).
+* Use `snake_case` for other names (functions, variables, enum class values,
+ etc.).
* Use an `m_` prefix for non-public member variables.
* Use a `g_` prefix for global mutable variables.
* Use a `k_` prefix for global constants.
diff --git a/LICENSE.adoc b/LICENSE.adoc
index 9052403..cbce985 100644
--- a/LICENSE.adoc
+++ b/LICENSE.adoc
@@ -38,7 +38,7 @@ The copyright for ccache as a whole is as follows:
-------------------------------------------------------------------------------
Copyright (C) 2002-2007 Andrew Tridgell
-Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+Copyright (C) 2009-2021 Joel Rosdahl and other contributors
-------------------------------------------------------------------------------
@@ -52,8 +52,8 @@ the GPL: that is, if separated from the ccache sources, they may be usable
under less restrictive terms.
-src/third_party/base32hex.[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/base32hex.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
This base32hex implementation comes from
<https://github.com/pmconrad/tinydnssec>.
@@ -75,8 +75,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
-------------------------------------------------------------------------------
-src/third_party/blake3/*.[hcS]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/blake3/blake3_*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a subset of https://github.com/BLAKE3-team/BLAKE3[BLAKE3] 0.3.7 with
the following license:
@@ -421,12 +421,12 @@ src/third_party/doctest.h
~~~~~~~~~~~~~~~~~~~~~~~~~
This is the single header version of https://github.com/onqtam/doctest[doctest]
-2.4.0 with the following license:
+2.4.4 with the following license:
-------------------------------------------------------------------------------
The MIT License (MIT)
-Copyright (c) 2016-2019 Viktor Kirilov
+Copyright (c) 2016-2020 Viktor Kirilov
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -451,7 +451,7 @@ SOFTWARE.
src/third_party/fmt/*.h and src/third_party/format.cpp
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This is a subset of https://fmt.dev[fmt] 7.0.3 with the following license:
+This is a subset of https://fmt.dev[fmt] 7.1.3 with the following license:
-------------------------------------------------------------------------------
Formatting library for C++
@@ -485,8 +485,8 @@ without including the above copyright and permission notices.
-------------------------------------------------------------------------------
-src/third_party/getopt_long.[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/getopt_long.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This implementation of `getopt_long()` was copied from
https://www.postgresql.org[PostgreSQL] and has the following license text:
@@ -524,8 +524,8 @@ SUCH DAMAGE.
-------------------------------------------------------------------------------
-src/third_party/minitrace.[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/minitrace.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
A library for producing JSON traces suitable for Chrome's built-in trace viewer
(chrome://tracing). Downloaded from <https://github.com/hrydgard/minitrace>.
@@ -558,11 +558,13 @@ SOFTWARE.
src/third_party/nonstd/optional.hpp
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This alternative implementation of `std::optional` was downloaded from
-<https://github.com/martinmoene/optional-lite> and has the following license
-text:
+This is the single header version of
+https://github.com/martinmoene/optional-lite[optional-lite] 3.4.0 with the
+following license:
-------------------------------------------------------------------------------
+Copyright (c) 2014-2018 Martin Moene
+
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
@@ -597,6 +599,8 @@ This alternative implementation of `std::string_view` was downloaded from
text:
-------------------------------------------------------------------------------
+Copyright 2017-2020 by Martin Moene
+
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
@@ -623,8 +627,8 @@ DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------------
-src/third_party/win32/getopt.[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/win32/getopt.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This implementation of `getopt_long()` for Win32 was taken from
https://www.codeproject.com/Articles/157001/Full-getopt-Port-for-Unicode-and-Multibyte-Microso
@@ -634,8 +638,32 @@ The full license text can be found in LGPL-3.0.txt and at
https://www.gnu.org/licenses/lgpl-3.0.html.
-src/third_party/xxh(ash|_x86dispatch).[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/win32/mktemp.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This implementation of `mkstemp()` for Win32 was adapted from
+<https://github.com/openbsd/src/blob/99b791d14c0f1858d87a0c33b55880fb9b00be66/lib/libc/stdio/mktemp.c>
+and has the folowing license text:
+
+-------------------------------------------------------------------------------
+Copyright (c) 1996-1998, 2008 Theo de Raadt
+Copyright (c) 1997, 2008-2009 Todd C. Miller
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+-------------------------------------------------------------------------------
+
+src/third_party/xxh*
+~~~~~~~~~~~~~~~~~~~~
xxHash - Extremely Fast Hash algorithm. Copied from xxHash v0.8.0 downloaded
from <https://github.com/Cyan4973/xxHash/releases>.
diff --git a/README.md b/README.md
index aa78593..57ae2ca 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ Contributing to ccache
* [Mailing list](https://lists.samba.org/mailman/listinfo/ccache/)
* [Chat](https://gitter.im/ccache/ccache)
* [Bug report info](https://ccache.dev/bugs.html)
+* [Discussions](https://github.com/ccache/ccache/discussions)
* [Issue tracker](https://github.com/ccache/ccache/issues)
* [Help wanted!](https://github.com/ccache/ccache/labels/help%20wanted)
* [Good first issues!](https://github.com/ccache/ccache/labels/good%20first%20issue)
diff --git a/ci/build-and-verify-source-package b/ci/build-and-verify-source-package
index 5a212c2..d74bc15 100755
--- a/ci/build-and-verify-source-package
+++ b/ci/build-and-verify-source-package
@@ -5,6 +5,9 @@
set -eu
+# Unset CI variable to trigger ccache user build mode.
+unset CI
+
# Ninja builds with relative paths so that ccache can be used to cache the build
# without resorting to setting base_dir.
export CMAKE_GENERATOR=Ninja
diff --git a/cmake/CIBuildType.cmake b/cmake/CIBuildType.cmake
index 963bc50..e721614 100644
--- a/cmake/CIBuildType.cmake
+++ b/cmake/CIBuildType.cmake
@@ -25,7 +25,7 @@ set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel CI."
FORCE)
-string(REPLACE -DNDEBUG "" CMAKE_CXX_FLAGS_CI ${CMAKE_CXX_FLAGS_CI})
-string(REPLACE -DNDEBUG "" CMAKE_C_FLAGS_CI ${CMAKE_C_FLAGS_CI})
+string(REGEX REPLACE "[/-]DNDEBUG" "" CMAKE_CXX_FLAGS_CI ${CMAKE_CXX_FLAGS_CI})
+string(REGEX REPLACE "[/-]DNDEBUG" "" CMAKE_C_FLAGS_CI ${CMAKE_C_FLAGS_CI})
string(STRIP ${CMAKE_CXX_FLAGS_CI} CMAKE_CXX_FLAGS_CI)
string(STRIP ${CMAKE_C_FLAGS_CI} CMAKE_C_FLAGS_CI)
diff --git a/cmake/CcachePackConfig.cmake b/cmake/CcachePackConfig.cmake
index daaca30..a35949d 100644
--- a/cmake/CcachePackConfig.cmake
+++ b/cmake/CcachePackConfig.cmake
@@ -6,7 +6,7 @@ if(${CMAKE_VERSION} VERSION_LESS "3.9")
endif()
# From CcacheVersion.cmake.
-set(CPACK_PACKAGE_VERSION ${VERSION})
+set(CPACK_PACKAGE_VERSION ${CCACHE_VERSION})
set(CPACK_VERBATIM_VARIABLES ON)
@@ -18,7 +18,7 @@ endif()
set(
CPACK_PACKAGE_FILE_NAME
- "ccache-${VERSION}-${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}"
+ "ccache-${CCACHE_VERSION}-${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}"
)
include(CPack)
diff --git a/cmake/CcacheVersion.cmake b/cmake/CcacheVersion.cmake
index 0af8f03..386d6ca 100644
--- a/cmake/CcacheVersion.cmake
+++ b/cmake/CcacheVersion.cmake
@@ -1,3 +1,8 @@
+# This script sets two variables:
+#
+# - CCACHE_VERSION (version string)
+# - CCACHE_VERSION_ORIGIN (archive or git)
+#
# There are three main scenarios:
#
# 1. Building from a source code archive generated by "git archive", e.g. the
@@ -13,25 +18,32 @@
# 3. Building from a Git repository. In this case the version will be a proper
# version if building a tagged commit, otherwise "branch.hash(+dirty)". In
# case Git is not available, the version will be "unknown".
+#
+# CCACHE_VERSION_ORIGIN is set to "archive" in scenario 1 and "git" in scenario
+# 3.
-set(version_info "897b6065398b5e80402ae1c51a60a2cefc765ed1 HEAD, tag: v4.1, origin/master, origin/HEAD, master")
+set(version_info "12ecd73fcd8aa7024d5851c1738223b8aff0c6e9 HEAD, tag: v4.2, origin/master, origin/HEAD, master")
if(version_info MATCHES "^([0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f])[0-9a-f]* (.*)")
# Scenario 1.
+ set(CCACHE_VERSION_ORIGIN archive)
+
set(hash "${CMAKE_MATCH_1}")
set(ref_names "${CMAKE_MATCH_2}")
if(ref_names MATCHES "tag: v([^,]+)")
# Tagged commit.
- set(VERSION "${CMAKE_MATCH_1}")
+ set(CCACHE_VERSION "${CMAKE_MATCH_1}")
else()
# Untagged commit.
- set(VERSION "${hash}")
+ set(CCACHE_VERSION "${hash}")
endif()
elseif(EXISTS "${CMAKE_SOURCE_DIR}/.git")
# Scenario 3.
+ set(CCACHE_VERSION_ORIGIN git)
+
find_package(Git QUIET)
if(NOT GIT_FOUND)
- set(VERSION "unknown")
+ set(CCACHE_VERSION "unknown")
message(WARNING "Could not find git")
else()
macro(git)
@@ -43,9 +55,9 @@ elseif(EXISTS "${CMAKE_SOURCE_DIR}/.git")
git(describe --abbrev=8 --dirty)
if(git_stdout MATCHES "^v([^-]+)(-dirty)?$")
- set(VERSION "${CMAKE_MATCH_1}")
+ set(CCACHE_VERSION "${CMAKE_MATCH_1}")
if(NOT "${CMAKE_MATCH_2}" STREQUAL "")
- set(VERSION "${VERSION}+dirty")
+ set(CCACHE_VERSION "${CCACHE_VERSION}+dirty")
endif()
elseif(git_stdout MATCHES "^v[^-]+-[0-9]+-g([0-9a-f]+)(-dirty)?$")
set(hash "${CMAKE_MATCH_1}")
@@ -55,12 +67,14 @@ elseif(EXISTS "${CMAKE_SOURCE_DIR}/.git")
git(rev-parse --abbrev-ref HEAD)
set(branch "${git_stdout}")
- set(VERSION "${branch}.${hash}${dirty}")
+ set(CCACHE_VERSION "${branch}.${hash}${dirty}")
endif() # else: fail below
endif()
endif()
-if(VERSION STREQUAL "")
+if(CCACHE_VERSION STREQUAL "")
# Scenario 2 or unexpected error.
message(SEND_ERROR "Cannot determine Ccache version")
endif()
+
+message(STATUS "Ccache version: ${CCACHE_VERSION}")
diff --git a/cmake/CheckAsmCompilerFlag.cmake b/cmake/CheckAsmCompilerFlag.cmake
deleted file mode 100644
index 07f5f8e..0000000
--- a/cmake/CheckAsmCompilerFlag.cmake
+++ /dev/null
@@ -1,62 +0,0 @@
-include(CMakeCheckCompilerFlagCommonPatterns)
-
-function(check_asm_compiler_flag flag var)
- if(DEFINED "${var}")
- return()
- endif()
-
- set(locale_vars LC_ALL LC_MESSAGES LANG)
- foreach(v IN LISTS locale_vars)
- set(locale_vars_saved_${v} "$ENV{${v}}")
- set(ENV{${v}} C)
- endforeach()
-
- check_compiler_flag_common_patterns(common_patterns)
-
- set(test_file "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.S")
- file(WRITE "${test_file}" ".global main\nmain:\n")
-
- if(NOT CMAKE_REQUIRED_QUIET)
- message(STATUS "Performing Test ${var}")
- endif()
- try_compile(
- ${var}
- "${CMAKE_BINARY_DIR}"
- "${test_file}"
- COMPILE_DEFINITIONS "${flag}"
- OUTPUT_VARIABLE output)
-
- check_compiler_flag_common_patterns(common_fail_patterns)
-
- foreach(regex ${common_fail_patterns})
- if("${output}" MATCHES "${regex}")
- set(${var} 0)
- endif()
- endforeach()
-
- if(${${var}})
- set(${var} 1 CACHE INTERNAL "Test ${var}")
- if(NOT CMAKE_REQUIRED_QUIET)
- message(STATUS "Performing Test ${var} - Success")
- endif()
- file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
- "Performing ASM SOURCE FILE Test ${var} succeeded with the following output:\n"
- "${output}\n"
- "Source file was:\n${test_file}\n")
- else()
- if(NOT CMAKE_REQUIRED_QUIET)
- message(STATUS "Performing Test ${var} - Failed")
- endif()
- set(${var} "" CACHE INTERNAL "Test ${var}")
- file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
- "Performing ASM SOURCE FILE Test ${var} failed with the following output:\n"
- "${output}\n"
- "Source file was:\n${test_file}\n")
- endif()
-
- foreach(v IN LISTS locale_vars)
- set(ENV{${v}} ${locale_vars_saved_${v}})
- endforeach()
-
- set(${var} "${${var}}" PARENT_SCOPE)
-endfunction()
diff --git a/cmake/DefaultBuildType.cmake b/cmake/DefaultBuildType.cmake
index 87b7647..630ecfb 100644
--- a/cmake/DefaultBuildType.cmake
+++ b/cmake/DefaultBuildType.cmake
@@ -6,7 +6,7 @@ endif()
# Default to Release for end user builds (from source archive) and Debug for
# development builds (in a Git repository).
-if(EXISTS "${CMAKE_SOURCE_DIR}/.git")
+if(CCACHE_DEV_MODE)
set(
CMAKE_BUILD_TYPE "Debug"
CACHE STRING "Choose the type of build." FORCE)
diff --git a/cmake/DevModeWarnings.cmake b/cmake/DevModeWarnings.cmake
new file mode 100644
index 0000000..7ff5411
--- /dev/null
+++ b/cmake/DevModeWarnings.cmake
@@ -0,0 +1,136 @@
+include(CheckCXXCompilerFlag)
+
+# check_cxx_compiler_flag caches the result, so a unique variable name is
+# required for every flag to be checked.
+#
+# Parameters:
+#
+# * flag [in], e.g. FLAG
+# * var_name_of_var_name [in], e.g. "TEMP". This is the variable that "HAS_FLAG"
+# will be written to.
+function(generate_unique_has_flag_var_name flag var_name_of_var_name)
+ string(REGEX REPLACE "[=-]" "_" var_name "${flag}")
+ string(TOUPPER "${var_name}" var_name)
+ set(${var_name_of_var_name} "HAS_${var_name}" PARENT_SCOPE)
+endfunction()
+
+macro(add_compile_flag_if_supported_ex varname flag alternative_flag)
+ # has_flag will contain "HAS_$flag" so each flag gets a unique HAS variable.
+ generate_unique_has_flag_var_name("${flag}" "has_flag")
+
+ # Instead of passing "has_flag" this passes the content of has_flag.
+ check_cxx_compiler_flag("${flag}" "${has_flag}")
+
+ if(${${has_flag}})
+ list(APPEND "${varname}" "${flag}")
+ elseif("${alternative_flag}")
+ add_compile_flag_if_supported_ex("${varname}" ${alternative_flag} "")
+ endif()
+endmacro()
+
+macro(add_compile_flag_if_supported varname flag)
+ add_compile_flag_if_supported_ex("${varname}" "${flag}" "")
+endmacro()
+
+set(
+ _clang_gcc_warnings
+ -Wextra
+ -Wnon-virtual-dtor
+ -Wcast-align
+ -Wunused
+ -Woverloaded-virtual
+ -Wpedantic
+
+ # Candidates for enabling in the future:
+ # -Wshadow
+ # -Wold-style-cast
+ # -Wconversion
+ # -Wsign-conversion
+ # -Wnull-dereference
+ # -Wformat=2
+)
+
+# Tested separately as this is not supported by Clang 3.4.
+add_compile_flag_if_supported(_clang_gcc_warnings "-Wdouble-promotion")
+
+if(WARNINGS_AS_ERRORS)
+ list(APPEND _clang_gcc_warnings -Werror)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ list(APPEND CCACHE_COMPILER_WARNINGS ${_clang_gcc_warnings})
+
+ if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0)
+ list(
+ APPEND
+ CCACHE_COMPILER_WARNINGS
+ -Qunused-arguments
+ -Wno-error=unreachable-code
+ )
+ endif()
+
+ # If compiler supports -Wshadow-field-in-constructor, disable only that.
+ # Otherwise disable shadow.
+ add_compile_flag_if_supported_ex(
+ CCACHE_COMPILER_WARNINGS "-Wno-shadow-field-in-constructor" "-Wno-shadow")
+
+ # Disable C++20 compatibility for now.
+ add_compile_flag_if_supported(CCACHE_COMPILER_WARNINGS "-Wno-c++2a-compat")
+
+ # If compiler supports these warnings they have to be disabled for now.
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-zero-as-null-pointer-constant")
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-undefined-func-template")
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-return-std-move-in-c++11")
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ list(
+ APPEND
+ CCACHE_COMPILER_WARNINGS
+ ${_clang_gcc_warnings}
+
+ # Warn about logical operations being used where bitwise were probably
+ # wanted.
+ -Wlogical-op
+
+ # Candidates for enabling in the future:
+ # -Wduplicated-cond
+ # -Wduplicated-branches
+ # -Wuseless-cast
+ )
+
+ # TODO: Exact version or reason unknown, discovered in Ubuntu 14 Docker test
+ # with GCC 4.8.4
+ if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8.5)
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-missing-field-initializers")
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-unused-variable")
+ endif()
+elseif(MSVC)
+ # Remove any warning level flags added by CMake.
+ string(REGEX REPLACE "/W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+ string(REGEX REPLACE "/W[0-4]" "" CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS}")
+ string(REGEX REPLACE "/W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+ if(WARNINGS_AS_ERRORS)
+ list(APPEND CCACHE_COMPILER_WARNINGS /WX)
+ endif()
+
+ list(
+ APPEND
+ CCACHE_COMPILER_WARNINGS
+ /W4
+ # Ignore bad macro in winbase.h triggered by /Zc:preprocessor:
+ /wd5105
+ # Conversion warnings:
+ /wd4244
+ /wd4245
+ /wd4267
+ # Assignment in conditional:
+ /wd4706
+ # Non-underscore-prefixed POSIX functions:
+ /wd4996
+ )
+endif()
diff --git a/cmake/Findzstd.cmake b/cmake/Findzstd.cmake
index 848348f..0044937 100644
--- a/cmake/Findzstd.cmake
+++ b/cmake/Findzstd.cmake
@@ -6,7 +6,7 @@ if(ZSTD_FROM_INTERNET)
# Although ${zstd_FIND_VERSION} was requested, let's download a newer version.
# Note: The directory structure has changed in 1.3.0; we only support 1.3.0
# and newer.
- set(zstd_version "1.4.5")
+ set(zstd_version "1.4.8")
set(zstd_url https://github.com/facebook/zstd/archive/v${zstd_version}.tar.gz)
set(zstd_dir ${CMAKE_BINARY_DIR}/zstd-${zstd_version})
diff --git a/cmake/GenerateConfigurationFile.cmake b/cmake/GenerateConfigurationFile.cmake
index a21861f..6e6b604 100644
--- a/cmake/GenerateConfigurationFile.cmake
+++ b/cmake/GenerateConfigurationFile.cmake
@@ -30,7 +30,6 @@ set(functions
getopt_long
getpwuid
gettimeofday
- mkstemp
posix_fallocate
realpath
setenv
@@ -57,6 +56,7 @@ check_c_source_compiles(
}
]=]
HAVE_PTHREAD_MUTEX_ROBUST)
+check_function_exists(pthread_mutexattr_setpshared HAVE_PTHREAD_MUTEXATTR_SETPSHARED)
set(CMAKE_REQUIRED_LINK_OPTIONS)
include(CheckStructHasMember)
@@ -67,18 +67,19 @@ check_struct_has_member("struct stat" st_mtim sys/stat.h
check_struct_has_member("struct statfs" f_fstypename sys/mount.h
HAVE_STRUCT_STATFS_F_FSTYPENAME)
-include(CheckCXXCompilerFlag)
-
-# Old GCC versions don't have the required header support.
-# Old Apple Clang versions seem to support -mavx2 but not the target
-# attribute that's used to enable AVX2 for a certain function.
-if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
- OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0))
- message(STATUS "Detected unsupported compiler for HAVE_AVX2 - disabled")
- set(HAVE_AVX2 FALSE)
-else()
- check_cxx_compiler_flag(-mavx2 HAVE_AVX2)
-endif()
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles(
+ [=[
+ #include <immintrin.h>
+ void func() __attribute__((target("avx2")));
+ void func() { _mm256_abs_epi8(_mm256_set1_epi32(42)); }
+ int main()
+ {
+ func();
+ return 0;
+ }
+ ]=]
+ HAVE_AVX2)
list(APPEND CMAKE_REQUIRED_LIBRARIES ws2_32)
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ws2_32)
diff --git a/cmake/GenerateVersionFile.cmake b/cmake/GenerateVersionFile.cmake
index 1517d44..7b2ad0c 100644
--- a/cmake/GenerateVersionFile.cmake
+++ b/cmake/GenerateVersionFile.cmake
@@ -1,6 +1,4 @@
-include(CcacheVersion)
configure_file(
${CMAKE_SOURCE_DIR}/cmake/version.cpp.in
${CMAKE_BINARY_DIR}/src/version.cpp
@ONLY)
-message(STATUS "Ccache version: ${VERSION}")
diff --git a/cmake/StandardWarnings.cmake b/cmake/StandardWarnings.cmake
index 3507737..6a0ca71 100644
--- a/cmake/StandardWarnings.cmake
+++ b/cmake/StandardWarnings.cmake
@@ -2,161 +2,21 @@
# be linked privately by all product and test code, but not by third party code.
add_library(standard_warnings INTERFACE)
-if(IS_DIRECTORY "${CMAKE_SOURCE_DIR}/.git" OR DEFINED ENV{"CI"})
- # Enabled by default for development builds and CI builds.
+if(CCACHE_DEV_MODE)
+ # Enabled by default for developer builds.
option(WARNINGS_AS_ERRORS "Treat compiler warnings as errors" TRUE)
else()
- # Disabled by default for end user builds so compilation doesn't fail with new
+ # Disabled by default for user builds so compilation doesn't fail with new
# compilers that may emit new warnings.
option(WARNINGS_AS_ERRORS "Treat compiler warnings as errors" FALSE)
endif()
-include(CheckCXXCompilerFlag)
-
-# check_cxx_compiler_flag caches the result, so a unique variable name is
-# required for every flag to be checked.
-#
-# Parameters:
-#
-# * flag [in], e.g. FLAG
-# * var_name_of_var_name [in], e.g. "TEMP". This is the variable that "HAS_FLAG"
-# will be written to.
-function(generate_unique_has_flag_var_name flag var_name_of_var_name)
- string(REGEX REPLACE "[=-]" "_" var_name "${flag}")
- string(TOUPPER "${var_name}" var_name)
- set(${var_name_of_var_name} "HAS_${var_name}" PARENT_SCOPE)
-endfunction()
-
-function(add_target_compile_flag_if_supported_ex target flag alternative_flag)
- # has_flag will contain "HAS_$flag" so each flag gets a unique HAS variable.
- generate_unique_has_flag_var_name("${flag}" "has_flag")
-
- # Instead of passing "has_flag" this passes the content of has_flag.
- check_cxx_compiler_flag("${flag}" "${has_flag}")
-
- if(${${has_flag}})
- target_compile_options(${target} INTERFACE "${flag}")
- elseif("${alternative_flag}")
- add_target_compile_flag_if_supported_ex(${target} ${alternative_flag} "")
- endif()
-endfunction()
-
-# TODO: Is there a better way to provide an optional third argument?
-macro(add_target_compile_flag_if_supported target flag)
- add_target_compile_flag_if_supported_ex("${target}" "${flag}" "")
-endmacro()
-
-set(CLANG_GCC_WARNINGS
- -Wall
- -Wextra
- -Wnon-virtual-dtor
- -Wcast-align
- -Wunused
- -Woverloaded-virtual
- -Wpedantic
-
- # Candidates for enabling in the future:
- # -Wshadow
- # -Wold-style-cast
- # -Wconversion
- # -Wsign-conversion
- # -Wnull-dereference
- # -Wformat=2
-)
-# Tested separately as this is not supported by Clang 3.4.
-add_target_compile_flag_if_supported(standard_warnings "-Wdouble-promotion")
-
-if(WARNINGS_AS_ERRORS)
- set(CLANG_GCC_WARNINGS ${CLANG_GCC_WARNINGS} -Werror)
+if(NOT MSVC)
+ set(CCACHE_COMPILER_WARNINGS -Wall)
endif()
-if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
- if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0)
- set(
- CLANG_GCC_WARNINGS
- ${CLANG_GCC_WARNINGS}
- -Qunused-arguments
- -Wno-error=unreachable-code)
- endif()
-
- target_compile_options(
- standard_warnings
- INTERFACE
- ${CLANG_GCC_WARNINGS}
- -Weverything
- -Wno-c++98-compat-pedantic
- -Wno-c++98-compat
- -Wno-constexpr-not-const
- -Wno-conversion
- -Wno-disabled-macro-expansion
- -Wno-documentation-unknown-command
- -Wno-exit-time-destructors
- -Wno-format-nonliteral
- -Wno-global-constructors
- -Wno-implicit-fallthrough
- -Wno-padded
- -Wno-shadow # Warnings in fmtlib
- -Wno-shorten-64-to-32
- -Wno-sign-conversion
- -Wno-signed-enum-bitfield # Warnings in fmtlib
- -Wno-weak-vtables
- -Wno-old-style-cast)
-
- # If compiler supports -Wshadow-field-in-constructor, disable only that.
- # Otherwise disable shadow.
- add_target_compile_flag_if_supported_ex(
- standard_warnings "-Wno-shadow-field-in-constructor" "-Wno-shadow")
-
- # Disable C++20 compatibility for now.
- add_target_compile_flag_if_supported(standard_warnings "-Wno-c++2a-compat")
-
- # If compiler supports these warnings they have to be disabled for now.
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-zero-as-null-pointer-constant")
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-undefined-func-template")
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-return-std-move-in-c++11")
-elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- target_compile_options(
- standard_warnings
- INTERFACE ${CLANG_GCC_WARNINGS}
- # Warn about logical operations being used where bitwise were probably
- # wanted.
- -Wlogical-op
-
- # Candidates for enabling in the future:
- # -Wduplicated-cond
- # -Wduplicated-branches
- # -Wuseless-cast
- )
-
- # TODO: Exact version or reason unknown, discovered in Ubuntu 14 Docker test
- # with GCC 4.8.4
- if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8.5)
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-missing-field-initializers")
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-unused-variable")
- endif()
-elseif(MSVC)
- # Remove any warning level flags added by CMake.
- string(REGEX REPLACE "/W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
- string(REGEX REPLACE "/W[0-4]" "" CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS}")
- string(REGEX REPLACE "/W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-
- target_compile_options(
- standard_warnings
- INTERFACE
- /W4
- # Ignore bad macro in winbase.h triggered by /Zc:preprocessor
- /wd5105
- # Conversion warnings.
- /wd4244
- /wd4267
- # Assignment in conditional.
- /wd4706
- # Non-underscore-prefixed POSIX functions.
- /wd4996
- )
+if(CCACHE_DEV_MODE)
+ include(DevModeWarnings)
endif()
+
+target_compile_options(standard_warnings INTERFACE ${CCACHE_COMPILER_WARNINGS})
diff --git a/cmake/UseCcache.cmake b/cmake/UseCcache.cmake
new file mode 100644
index 0000000..e89339a
--- /dev/null
+++ b/cmake/UseCcache.cmake
@@ -0,0 +1,69 @@
+# Note: Compiling ccache via ccache is fine because the ccache version installed
+# in the system is used.
+
+# Calls `message(VERBOSE msg)` if and only if VERBOSE is available (since CMake
+# 3.15). Call CMake with --log-level=VERBOSE to view verbose messages.
+function(message_verbose msg)
+ if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15")
+ message(VERBOSE ${msg})
+ endif()
+endfunction()
+
+function(use_ccache)
+ if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
+ message(WARNING "use_ccache() disabled, as it is not called from the project top level")
+ return()
+ endif()
+
+ find_program(CCACHE_PROGRAM ccache)
+ if(NOT CCACHE_PROGRAM)
+ message_verbose("Ccache program not found, not enabling ccache for faster recompilation")
+ return()
+ endif()
+
+ message_verbose("Ccache enabled for faster recompilation")
+
+ # Note: This will override any config and environment settings.
+ set(ccache_env
+ # Another option would be CMAKE_BINARY_DIR, but currently only one base
+ # directory is supported.
+ CCACHE_BASEDIR=${CMAKE_SOURCE_DIR}
+
+ # In case of very old ccache versions (pre 3.3).
+ CCACHE_CPP2=true
+ )
+
+ if(CMAKE_GENERATOR MATCHES "Ninja|Makefiles")
+ find_program(ENV_PROGRAM env)
+ if(ENV_PROGRAM)
+ set(env_program ${ENV_PROGRAM}) # faster than "cmake -E env"
+ else()
+ set(env_program ${CMAKE_COMMAND} -E env)
+ endif()
+ foreach(lang IN ITEMS C CXX OBJC OBJCXX CUDA)
+ set(CMAKE_${lang}_COMPILER_LAUNCHER
+ ${env_program} ${ccache_env} ${CCACHE_PROGRAM}
+ PARENT_SCOPE)
+ endforeach()
+ elseif(CMAKE_GENERATOR STREQUAL Xcode)
+ foreach(lang IN ITEMS C CXX)
+ set(launcher ${CMAKE_BINARY_DIR}/launch-${lang})
+ file(WRITE ${launcher} "#!/bin/bash\n\n")
+ foreach(key_val IN LISTS ccache_env)
+ file(APPEND ${launcher} "export ${key_val}\n")
+ endforeach()
+ file(APPEND ${launcher}
+ "exec \"${CCACHE_PROGRAM}\" \"${CMAKE_${lang}_COMPILER}\" \"$@\"\n")
+ execute_process(COMMAND chmod a+rx ${launcher})
+ endforeach()
+ set(CMAKE_XCODE_ATTRIBUTE_CC ${CMAKE_BINARY_DIR}/launch-C PARENT_SCOPE)
+ set(CMAKE_XCODE_ATTRIBUTE_CXX ${CMAKE_BINARY_DIR}/launch-CXX PARENT_SCOPE)
+ set(CMAKE_XCODE_ATTRIBUTE_LD ${CMAKE_BINARY_DIR}/launch-C PARENT_SCOPE)
+ set(CMAKE_XCODE_ATTRIBUTE_LDPLUSPLUS ${CMAKE_BINARY_DIR}/launch-CXX PARENT_SCOPE)
+ endif()
+endfunction()
+
+option(USE_CCACHE "Use ccache to speed up recompilation time" TRUE)
+if(USE_CCACHE)
+ use_ccache()
+endif()
diff --git a/cmake/UseFastestLinker.cmake b/cmake/UseFastestLinker.cmake
new file mode 100644
index 0000000..c96639c
--- /dev/null
+++ b/cmake/UseFastestLinker.cmake
@@ -0,0 +1,34 @@
+# Calls `message(VERBOSE msg)` if and only if VERBOSE is available (since CMake 3.15).
+# Call CMake with --loglevel=VERBOSE to view those messages.
+function(message_verbose msg)
+ if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15")
+ message(VERBOSE ${msg})
+ endif()
+endfunction()
+
+function(use_fastest_linker)
+ if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
+ message(WARNING "use_fastest_linker() disabled, as it is not called at the project top level")
+ return()
+ endif()
+
+ find_program(FASTER_LINKER ld.lld)
+ if(NOT FASTER_LINKER)
+ find_program(FASTER_LINKER ld.gold)
+ endif()
+
+ if(FASTER_LINKER)
+ # Note: Compiler flag -fuse-ld requires gcc 9 or clang 3.8.
+ # Instead override CMAKE_CXX_LINK_EXECUTABLE directly.
+ # By default CMake uses the compiler executable for linking.
+ set(CMAKE_CXX_LINK_EXECUTABLE "${FASTER_LINKER} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+ message_verbose("Using ${FASTER_LINKER} linker for faster linking")
+ else()
+ message_verbose("Using default linker")
+ endif()
+endfunction()
+
+option(USE_FASTER_LINKER "Use the lld or gold linker instead of the default for faster linking" TRUE)
+if(USE_FASTER_LINKER)
+ use_fastest_linker()
+endif()
diff --git a/cmake/config.h.in b/cmake/config.h.in
index 28a0706..b4e412f 100644
--- a/cmake/config.h.in
+++ b/cmake/config.h.in
@@ -27,11 +27,13 @@
// features on Mac OS X, so we need _DARWIN_C_SOURCE to re-enable them.
#cmakedefine _DARWIN_C_SOURCE
-// Define to activate features from IEEE Stds 1003.1-2001.
+// Define to activate features from IEEE Stds 1003.1-2008.
#define _POSIX_C_SOURCE 200809L
#if defined(__SunOS_5_8) || defined(__SunOS_5_9) || defined(__SunOS_5_10)
# define _XOPEN_SOURCE 500
+#elif defined(__FreeBSD__)
+# define _XOPEN_SOURCE 700
#elif !defined(__SunOS_5_11) && !defined(__APPLE__)
# define _XOPEN_SOURCE
#endif
@@ -83,12 +85,12 @@
// Define if the system has the type "long long".
#cmakedefine HAVE_LONG_LONG
-// Define if you have the "mkstemp" function.
-#cmakedefine HAVE_MKSTEMP
-
// Define if you have the "posix_fallocate.
#cmakedefine HAVE_POSIX_FALLOCATE
+// Define if you have the "pthread_mutexattr_setpshared" function.
+#cmakedefine HAVE_PTHREAD_MUTEXATTR_SETPSHARED
+
// Define if you have the <pwd.h> header file.
#cmakedefine HAVE_PWD_H
diff --git a/cmake/version.cpp.in b/cmake/version.cpp.in
index 291f049..3b87746 100644
--- a/cmake/version.cpp.in
+++ b/cmake/version.cpp.in
@@ -1,2 +1,2 @@
extern const char CCACHE_VERSION[];
-const char CCACHE_VERSION[] = "@VERSION@";
+const char CCACHE_VERSION[] = "@CCACHE_VERSION@";
diff --git a/doc/AUTHORS.adoc b/doc/AUTHORS.adoc
index eb15cef..dc3717c 100644
--- a/doc/AUTHORS.adoc
+++ b/doc/AUTHORS.adoc
@@ -18,6 +18,7 @@ Ccache is a collective work with contributions from many people, including:
* Andrew Stubbs
* Andrew Tridgell
* Arne Hasselbring
+* Azat Khuzhin
* Bernhard Bauer
* Björn Jacke
* Breno Guimaraes
@@ -54,7 +55,9 @@ Ccache is a collective work with contributions from many people, including:
* Jørgen P. Tjernø
* Josh Soref
* Justin Lebar
+* Ka Ho Ng
* Karl Chen
+* Khem Raj
* Kona Blend
* Kovarththanan Rajaratnam
* Lalit Chhabra
@@ -82,6 +85,7 @@ Ccache is a collective work with contributions from many people, including:
* Mizuha Himuraki
* Mostyn Bramley-Moore
* Neil Mushell
+* Nicholas Hutchinson
* Nick Schultz
* Norbert Lange
* Oded Shimon
@@ -108,7 +112,9 @@ Ccache is a collective work with contributions from many people, including:
* Ryan Brown
* Ryan Egesdahl
* Sam Gross
+* Sergei Trofimovich
* Steffen Dettmer
+* Stuart Henderson
* Sumit Jamgade
* Thomas Otto
* Thomas Röfer
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index b5c9f24..dda4703 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -2,7 +2,7 @@ find_program(ASCIIDOC_EXE asciidoc)
mark_as_advanced(ASCIIDOC_EXE) # Don't show in CMake UIs
if(NOT ASCIIDOC_EXE)
- message(WARNING "Could not find asciidoc; documentation will not be generated")
+ message(NOTICE "Could not find asciidoc; documentation will not be generated")
else()
#
# HTML documentation
@@ -15,7 +15,7 @@ else()
COMMAND
${ASCIIDOC_EXE}
-o "${html_file}"
- -a revnumber="${VERSION}"
+ -a revnumber="${CCACHE_VERSION}"
-a toc
-b xhtml11
"${CMAKE_SOURCE_DIR}/${adoc_file}"
@@ -46,7 +46,7 @@ else()
COMMAND
${ASCIIDOC_EXE}
-o -
- -a revnumber=${VERSION}
+ -a revnumber=${CCACHE_VERSION}
-d manpage
-b docbook "${CMAKE_SOURCE_DIR}/doc/MANUAL.adoc"
| perl -pe 's!<literal>\(.*?\)</literal>!<emphasis role="strong">\\1</emphasis>!g'
@@ -58,7 +58,7 @@ else()
COMMAND ${A2X_EXE} --doctype manpage --format manpage MANUAL.xml
MAIN_DEPENDENCY MANUAL.xml
)
- add_custom_target(doc-man-page ALL DEPENDS ccache.1)
+ add_custom_target(doc-man-page DEPENDS ccache.1)
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/ccache.1"
DESTINATION "${CMAKE_INSTALL_MANDIR}/man1"
@@ -66,5 +66,5 @@ else()
set(doc_files "${doc_files}" ccache.1)
endif()
- add_custom_target(doc DEPENDS "${doc_files}")
+ add_custom_target(doc ALL DEPENDS "${doc_files}")
endif()
diff --git a/doc/INSTALL.md b/doc/INSTALL.md
index 77c2a72..8a22618 100644
--- a/doc/INSTALL.md
+++ b/doc/INSTALL.md
@@ -20,6 +20,8 @@ To build ccache you need:
from the Internet and unpack it in the local binary tree. Ccache will
then be linked statically to the locally built libzstd.
+ To link libzstd statically you can use `-DZSTD_LIBRARY=/path/to/libzstd.a`.
+
Optional:
- GNU Bourne Again SHell (bash) for tests.
diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc
index b7d8912..98aae9a 100644
--- a/doc/MANUAL.adoc
+++ b/doc/MANUAL.adoc
@@ -63,8 +63,8 @@ is being used.
WARNING: The technique of letting ccache masquerade as the compiler works well,
but currently doesn't interact well with other tools that do the same thing.
-See <<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER
-COMPILER WRAPPERS>>.
+See _<<_using_ccache_with_other_compiler_wrappers,Using ccache with other
+compiler wrappers>>_.
WARNING: Use a symbolic links for masquerading, not hard links.
@@ -134,11 +134,11 @@ Common options
*`-X`* _LEVEL_, *`--recompress`* _LEVEL_::
- Recompress the cache using compression level _LEVEL_. The level can be an
- integer, with the same semantics as the
- <<config_compression_level,*compression_level*>> configuration setting), or
+ Recompress the cache to level _LEVEL_ using the Zstandard algorithm. The
+ level can be an integer, with the same semantics as the
+ <<config_compression_level,*compression_level*>> configuration option), or
the special value *uncompressed* for no compression. See
- <<_cache_compression,CACHE COMPRESSION>> for more information. This can
+ _<<_cache_compression,Cache compression>>_ for more information. This can
potentionally take a long time since all files in the cache need to be
visited. Only files that are currently compressed with a different level
than _LEVEL_ will be recompressed.
@@ -146,13 +146,13 @@ Common options
*`-o`* _KEY=VALUE_, *`--set-config`* _KEY_=_VALUE_::
Set configuration option _KEY_ to _VALUE_. See
- <<_configuration,CONFIGURATION>> for more information.
+ _<<_configuration,Configuration>>_ for more information.
*`-x`*, *`--show-compression`*::
- Print cache compression statistics. See <<_cache_compression,CACHE
- COMPRESSION>> for more information. This can potentionally take a long time
- since all files in the cache need to be visited.
+ Print cache compression statistics. See _<<_cache_compression,Cache
+ compression>>_ for more information. This can potentionally take a long
+ time since all files in the cache need to be visited.
*`-p`*, *`--show-config`*::
@@ -200,7 +200,7 @@ Options for scripting or debugging
*`-k`* _KEY_, *`--get-config`* _KEY_::
Print the value of configuration option _KEY_. See
- <<_configuration,CONFIGURATION>> for more information.
+ _<<_configuration,Configuration>>_ for more information.
*`--hash-file`* _PATH_::
@@ -209,7 +209,7 @@ Options for scripting or debugging
*`--print-stats`*::
- Print statistics counter IDs and corresponding values machine-parsable
+ Print statistics counter IDs and corresponding values in machine-parsable
(tab-separated) format.
@@ -242,11 +242,11 @@ compiler than what ccache thinks.
Configuration
-------------
-ccache's default behavior can be overridden by settings in configuration files,
+ccache's default behavior can be overridden by options in configuration files,
which in turn can be overridden by environment variables with names starting
with *CCACHE_*. Ccache normally reads configuration from two files: first a
system-level configuration file and secondly a cache-specific configuration
-file. The priority of configuration settings is as follows (where 1 is
+file. The priorities of configuration options are as follows (where 1 is
highest):
1. Environment variables.
@@ -268,13 +268,15 @@ The location of the primary (cache-specific) configuration is determined like
this:
1. If *CCACHE_CONFIGPATH* is set, use that path.
-2. Otherwise, if <<config_cache_dir,*cache_dir*>> (*CCACHE_DIR*) is set then
- use *<ccache_dir>/ccache.conf*.
-3. Otherwise, if there is a legacy *$HOME/.ccache* directory then use
+2. Otherwise, if the environment variable *CCACHE_DIR* is set then use
+ *$CCACHE_DIR/ccache.conf*.
+3. Otherwise, if <<config_cache_dir,*cache_dir*>> is set in the secondary
+ (system-wide) configuration file then use *<cache_dir>/ccache.conf*.
+4. Otherwise, if there is a legacy *$HOME/.ccache* directory then use
*$HOME/.ccache/ccache.conf*.
-4. Otherwise, if *XDG_CONFIG_HOME* is set then use
+5. Otherwise, if *XDG_CONFIG_HOME* is set then use
*$XDG_CONFIG_HOME/ccache/ccache.conf*.
-5. Otherwise, use *%APPDATA%/ccache/ccache.conf* (Windows),
+6. Otherwise, use *%APPDATA%/ccache/ccache.conf* (Windows),
*$HOME/Library/Preferences/ccache/ccache.conf* (macOS) or
*$HOME/.config/ccache/ccache.conf* (other systems).
@@ -282,7 +284,7 @@ this:
Configuration file syntax
~~~~~~~~~~~~~~~~~~~~~~~~~
-Configuration files are in a simple ``key = value'' format, one setting per
+Configuration files are in a simple ``key = value'' format, one option per
line. Lines starting with a hash sign are comments. Blank lines are ignored, as
is whitespace surrounding keys and values. Example:
@@ -331,9 +333,9 @@ option key.
directory, but only absolute paths that begin with *base_dir*. Cache
results can then be shared for compilations in different directories even
if the project uses absolute paths in the compiler command line. See also
- the discussion under <<_compiling_in_different_directories,COMPILING IN
- DIFFERENT DIRECTORIES>>. If set to the empty string (which is the default),
- no rewriting is done.
+ the discussion under _<<_compiling_in_different_directories,Compiling in
+ different directories>>_. If set to the empty string (which is the
+ default), no rewriting is done.
+
A typical path to use as *base_dir* is your home directory or another directory
that is a parent of your project directories. Don't use `/` as the base
@@ -383,16 +385,15 @@ project2 will be a different absolute path.
[[config_cache_dir]] *cache_dir* (*CCACHE_DIR*)::
This option specifies where ccache will keep its cached compiler outputs.
- It will only take effect if set in the system-wide configuration file or as
- an environment variable. The default is *$XDG_CACHE_HOME/ccache* if
- *XDG_CACHE_HOME* is set, otherwise *$HOME/.cache/ccache*. Exception: If the
- legacy directory *$HOME/.ccache* exists then that directory is the default.
-
- See also <<_location_of_the_primary_configuration_file,LOCATION OF THE
- PRIMARY CONFIGURATION FILE>>.
-
- If you want to use another *CCACHE_DIR* value temporarily for one ccache
- invocation you can use the `-d/--directory` command line option instead.
+ The default is *$XDG_CACHE_HOME/ccache* if *XDG_CACHE_HOME* is set,
+ otherwise *$HOME/.cache/ccache*. Exception: If the legacy directory
+ *$HOME/.ccache* exists then that directory is the default.
++
+See also _<<_location_of_the_primary_configuration_file,Location of the primary
+configuration file>>_.
++
+If you want to use another *CCACHE_DIR* value temporarily for one ccache
+invocation you can use the `-d/--directory` command line option instead.
[[config_compiler]] *compiler* (*CCACHE_COMPILER* or (deprecated) *CCACHE_CC*)::
@@ -422,8 +423,8 @@ project2 will be a different absolute path.
don't affect code generation). You should only use *none* if you know what
you are doing.
*string:value*::
- Use *value* as the string to calculate hash from. This can be the compiler
- revision number you retrieved earlier and set here via environment variable.
+ Hash *value*. This can for instance be a compiler revision number or
+ another string that the build system generates to identify the compiler.
_a command string_::
Hash the standard output and standard error output of the specified
command. The string will be split on whitespace to find out the command and
@@ -453,10 +454,10 @@ compiler but another compiler wrapper -- in that case, the default *mtime*
method will hash the mtime and size of the other compiler wrapper, which means
that ccache won't be able to detect a compiler upgrade. Using a suitable
command to identify the compiler is thus safer, but it's also slower, so you
-should consider continue using the *mtime* method in combination with
-the *prefix_command* option if possible. See
-<<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER COMPILER
-WRAPPERS>>.
+should consider continue using the *mtime* method in combination with the
+*prefix_command* option if possible. See
+_<<_using_ccache_with_other_compiler_wrappers,Using ccache with other compiler
+wrappers>>_.
--
--
@@ -483,7 +484,7 @@ WRAPPERS>>.
distcc's "pump" script.
--
-[[config_compression]] *compression* (*CCACHE_COMPRESS* or *CCACHE_NOCOMPRESS*, see <<_boolean_values,Boolean values>> above)::
+[[config_compression]] *compression* (*CCACHE_COMPRESS* or *CCACHE_NOCOMPRESS*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will compress data it puts in the cache. However, this
option has no effect on how files are retrieved from the cache; compressed
@@ -514,17 +515,24 @@ Semantics of *compression_level*:
A positive value corresponds to normal Zstandard compression levels. Lower
levels (e.g. *1*) mean faster compression but worse compression ratio.
Higher levels (e.g. *19*) mean slower compression but better compression
- ratio. The maximum possible value depends on the libzstd version.
- Decompression speed is essentially the same for all levels.
+ ratio. The maximum possible value depends on the libzstd version, but at
+ least up to 19 is available for all versions. Decompression speed is
+ essentially the same for all levels. As a rule of thumb, use level 5 or
+ lower since higher levels may slow down compilations noticeably. Higher
+ levels are however useful when recompressing the cache with command line
+ option *-X/--recompress*.
*< 0*::
- A negative value corresponds to Zstandard's “ultra-fast” compression
+ A negative value corresponds to Zstandard's ``ultra-fast'' compression
levels, which are even faster than level 1 but with less good compression
- ratios. For instance, level *-3* corresponds to “--fast=3” for the *zstd*
- command line tool.
+ ratios. For instance, level *-3* corresponds to ``--fast=3'' for the *zstd*
+ command line tool. In practice, there is little use for levels lower than
+ *-5* or so.
*0* (default)::
The value *0* means that ccache will choose a suitable level, currently
*1*.
--
++
+See the http://zstd.net[Zstandard documentation] for more information.
[[config_cpp_extension]] *cpp_extension* (*CCACHE_EXTENSION*)::
@@ -534,24 +542,38 @@ Semantics of *compression_level*:
compiled, but that sometimes doesn't work. For example, when using the
``aCC'' compiler on HP-UX, set the cpp extension to *i*.
-[[config_debug]] *debug* (*CCACHE_DEBUG* or *CCACHE_NODEBUG*, see <<_boolean_values,Boolean values>> above)::
+[[config_debug]] *debug* (*CCACHE_DEBUG* or *CCACHE_NODEBUG*, see _<<_boolean_values,Boolean values>>_ above)::
If true, enable the debug mode. The debug mode creates per-object debug
files that are helpful when debugging unexpected cache misses. Note however
that ccache performance will be reduced slightly. See
- <<_cache_debugging,debugging>> for more information. The default is false.
+ _<<_cache_debugging,Cache debugging>>_ for more information. The default is
+ false.
-[[config_depend_mode]] *depend_mode* (*CCACHE_DEPEND* or *CCACHE_NODEPEND*, see <<_boolean_values,Boolean values>> above)::
+[[config_debug_dir]] *debug_dir* (*CCACHE_DEBUGDIR*)::
+
+ Specifies where to write per-object debug files if the _<<config_debug,debug
+ mode>>_ is enabled. If set to the empty string, the files will be written
+ next to the object file. If set to a directory, the debug files will be
+ written with full absolute paths in that directory, creating it if needed.
+ The default is the empty string.
+
+ For example, if *debug_dir* is set to `/example`, the current working
+ directory is `/home/user` and the object file is `build/output.o` then the
+ debug log will be written to `/example/home/user/build/output.o.ccache-log`.
+ See also _<<_cache_debugging,Cache debugging>>_.
+
+[[config_depend_mode]] *depend_mode* (*CCACHE_DEPEND* or *CCACHE_NODEPEND*, see _<<_boolean_values,Boolean values>>_ above)::
If true, the depend mode will be used. The default is false. See
- <<_the_depend_mode,THE DEPEND MODE>>.
+ _<<_the_depend_mode,The depend mode>>_.
-[[config_direct_mode]] *direct_mode* (*CCACHE_DIRECT* or *CCACHE_NODIRECT*, see <<_boolean_values,Boolean values>> above)::
+[[config_direct_mode]] *direct_mode* (*CCACHE_DIRECT* or *CCACHE_NODIRECT*, see _<<_boolean_values,Boolean values>>_ above)::
If true, the direct mode will be used. The default is true. See
- <<_the_direct_mode,THE DIRECT MODE>>.
+ _<<_the_direct_mode,The direct mode>>_.
-[[config_disable]] *disable* (*CCACHE_DISABLE* or *CCACHE_NODISABLE*, see <<_boolean_values,Boolean values>> above)::
+[[config_disable]] *disable* (*CCACHE_DISABLE* or *CCACHE_NODISABLE*, see _<<_boolean_values,Boolean values>>_ above)::
When true, ccache will just call the real compiler, bypassing the cache
completely. The default is false.
@@ -562,10 +584,10 @@ Semantics of *compression_level*:
hash sum that identifies the build. The list separator is semicolon on
Windows systems and colon on other systems.
-[[config_file_clone]] *file_clone* (*CCACHE_FILECLONE* or *CCACHE_NOFILECLONE*, see <<_boolean_values,Boolean values>> above)::
+[[config_file_clone]] *file_clone* (*CCACHE_FILECLONE* or *CCACHE_NOFILECLONE*, see _<<_boolean_values,Boolean values>>_ above)::
- If true, ccache will attempt to use file cloning (also known as “copy on
- write”, “CoW” or “reflinks”) to store and fetch cached compiler results.
+ If true, ccache will attempt to use file cloning (also known as ``copy on
+ write'', ``CoW'' or ``reflinks'') to store and fetch cached compiler results.
*file_clone* has priority over <<config_hard_link,*hard_link*>>. The
default is false.
+
@@ -578,7 +600,7 @@ safe to use, but not all file systems support the feature. For such file
systems, ccache will fall back to use plain copying (or hard links if
<<config_hard_link,*hard_link*>> is enabled).
-[[config_hard_link]] *hard_link* (*CCACHE_HARDLINK* or *CCACHE_NOHARDLINK*, see <<_boolean_values,Boolean values>> above)::
+[[config_hard_link]] *hard_link* (*CCACHE_HARDLINK* or *CCACHE_NOHARDLINK*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will attempt to use hard links to store and fetch cached
object files. The default is false.
@@ -608,7 +630,7 @@ WARNING: Do not enable this option unless you are aware of these caveats:
*file.o* in build tree A as well. This can retrigger relinking in build tree
A even though nothing really has changed.
-[[config_hash_dir]] *hash_dir* (*CCACHE_HASHDIR* or *CCACHE_NOHASHDIR*, see <<_boolean_values,Boolean values>> above)::
+[[config_hash_dir]] *hash_dir* (*CCACHE_HASHDIR* or *CCACHE_NOHASHDIR*, see _<<_boolean_values,Boolean values>>_ above)::
If true (which is the default), ccache will include the current working
directory (CWD) in the hash that is used to distinguish two compilations
@@ -616,7 +638,8 @@ WARNING: Do not enable this option unless you are aware of these caveats:
Exception: The CWD will not be included in the hash if
<<config_base_dir,*base_dir*>> is set (and matches the CWD) and the
compiler option *-fdebug-prefix-map* is used. See also the discussion under
- <<_compiling_in_different_directories,COMPILING IN DIFFERENT DIRECTORIES>>.
+ _<<_compiling_in_different_directories,Compiling in different
+ directories>>_.
+
The reason for including the CWD in the hash by default is to prevent a problem
with the storage of the current working directory in the debug info of an
@@ -645,7 +668,7 @@ might be incorrect.
example, `-fmessage-length=*` will match both `-fmessage-length=20` and
`-fmessage-length=70`.
-[[config_inode_cache]] *inode_cache* (*CCACHE_INODECACHE* or *CCACHE_NOINODECACHE*, see <<_boolean_values,Boolean values>> above)::
+[[config_inode_cache]] *inode_cache* (*CCACHE_INODECACHE* or *CCACHE_NOINODECACHE*, see _<<_boolean_values,Boolean values>>_ above)::
If true, enables caching of source file hashes based on device, inode and
timestamps. This will reduce the time spent on hashing included files as
@@ -656,7 +679,7 @@ available on Windows.
+
The feature requires *temporary_dir* to be located on a local filesystem.
-[[config_keep_comments_cpp]] *keep_comments_cpp* (*CCACHE_COMMENTS* or *CCACHE_NOCOMMENTS*, see <<_boolean_values,Boolean values>> above)::
+[[config_keep_comments_cpp]] *keep_comments_cpp* (*CCACHE_COMMENTS* or *CCACHE_NOCOMMENTS*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will not discard the comments before hashing preprocessor
output. This can be used to check documentation with *-Wdocumentation*.
@@ -665,7 +688,7 @@ The feature requires *temporary_dir* to be located on a local filesystem.
Sets the limit when cleaning up. Files are deleted (in LRU order) until the
levels are below the limit. The default is 0.8 (= 80%). See
- <<_automatic_cleanup,AUTOMATIC CLEANUP>> for more information.
+ _<<_automatic_cleanup,Automatic cleanup>>_ for more information.
[[config_log_file]] *log_file* (*CCACHE_LOGFILE*)::
@@ -687,14 +710,14 @@ file in `/etc/rsyslog.d`:
This option specifies the maximum number of files to keep in the cache. Use
0 for no limit (which is the default). See also
- <<_cache_size_management,CACHE SIZE MANAGEMENT>>.
+ _<<_cache_size_management,Cache size management>>_.
[[config_max_size]] *max_size* (*CCACHE_MAXSIZE*)::
This option specifies the maximum size of the cache. Use 0 for no limit.
The default value is 5G. Available suffixes: k, M, G, T (decimal) and Ki,
Mi, Gi, Ti (binary). The default suffix is G. See also
- <<_cache_size_management,CACHE SIZE MANAGEMENT>>.
+ _<<_cache_size_management,Cache size management>>_.
[[config_path]] *path* (*CCACHE_PATH*)::
@@ -704,7 +727,7 @@ file in `/etc/rsyslog.d`:
matching the compiler name in the normal *PATH* that isn't a symbolic link
to ccache itself.
-[[config_pch_external_checksum]] *pch_external_checksum* (*CCACHE_PCH_EXTSUM* or *CCACHE_NOPCH_EXTSUM*, see <<_boolean_values,Boolean values>> above)::
+[[config_pch_external_checksum]] *pch_external_checksum* (*CCACHE_PCH_EXTSUM* or *CCACHE_NOPCH_EXTSUM*, see _<<_boolean_values,Boolean values>>_ above)::
When this option is set, and ccache finds a precompiled header file,
ccache will look for a file with the extension ``.sum'' added
@@ -716,15 +739,15 @@ file in `/etc/rsyslog.d`:
This option adds a list of prefixes (separated by space) to the command
line that ccache uses when invoking the compiler. See also
- <<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER
- COMPILER WRAPPERS>>.
+ _<<_using_ccache_with_other_compiler_wrappers,Using ccache with other
+ compiler wrappers>>_.
[[config_prefix_command_cpp]] *prefix_command_cpp* (*CCACHE_PREFIX_CPP*)::
This option adds a list of prefixes (separated by space) to the command
line that ccache uses when invoking the preprocessor.
-[[config_read_only]] *read_only* (*CCACHE_READONLY* or *CCACHE_NOREADONLY*, see <<_boolean_values,Boolean values>> above)::
+[[config_read_only]] *read_only* (*CCACHE_READONLY* or *CCACHE_NOREADONLY*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will attempt to use existing cached results, but it will not
add new results to the cache. Statistics counters will still be updated,
@@ -735,22 +758,22 @@ set <<config_temporary_dir,*temporary_dir*>> since ccache will fail to create
temporary files otherwise. You may also want to set <<config_stats,*stats*>> to
*false* make ccache not even try to update stats files.
-[[config_read_only_direct]] *read_only_direct* (*CCACHE_READONLY_DIRECT* or *CCACHE_NOREADONLY_DIRECT*, see <<_boolean_values,Boolean values>> above)::
+[[config_read_only_direct]] *read_only_direct* (*CCACHE_READONLY_DIRECT* or *CCACHE_NOREADONLY_DIRECT*, see _<<_boolean_values,Boolean values>>_ above)::
Just like <<config_read_only,*read_only*>> except that ccache will only try
to retrieve results from the cache using the direct mode, not the
preprocessor mode. See documentation for <<config_read_only,*read_only*>>
regarding using a read-only ccache directory.
-[[config_recache]] *recache* (*CCACHE_RECACHE* or *CCACHE_NORECACHE*, see <<_boolean_values,Boolean values>> above)::
+[[config_recache]] *recache* (*CCACHE_RECACHE* or *CCACHE_NORECACHE*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will not use any previously stored result. New results will
still be cached, possibly overwriting any pre-existing results.
-[[config_run_second_cpp]] *run_second_cpp* (*CCACHE_CPP2* or *CCACHE_NOCPP2*, see <<_boolean_values,Boolean values>> above)::
+[[config_run_second_cpp]] *run_second_cpp* (*CCACHE_CPP2* or *CCACHE_NOCPP2*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will first run the preprocessor to preprocess the source
- code (see <<_the_preprocessor_mode,THE PREPROCESSOR MODE>>) and then on a
+ code (see _<<_the_preprocessor_mode,The preprocessor mode>>_) and then on a
cache miss run the compiler on the source code to get hold of the object
file. This is the default.
+
@@ -792,11 +815,15 @@ still has to do _some_ preprocessing (like macros).
Ignore ctimes when *file_stat_matches* is enabled. This can be useful when
backdating files' mtimes in a controlled way.
*include_file_ctime*::
- By default, ccache will not cache a file if it includes a header whose
- ctime is too new. This sloppiness disables that check.
+ By default, ccache will not cache a file if it includes a header whose ctime
+ is too new. This sloppiness disables that check. See also
+ _<<_handling_of_newly_created_header_files,Handling of newly created header
+ files>>_.
*include_file_mtime*::
By default, ccache will not cache a file if it includes a header whose
- mtime is too new. This sloppiness disables that check.
+ mtime is too new. This sloppiness disables that check. See also
+ _<<_handling_of_newly_created_header_files,Handling of newly created header
+ files>>_.
*locale*::
Ccache includes the environment variables *LANG*, *LC_ALL*, *LC_CTYPE* and
*LC_MESSAGES* in the hash by default since they may affect localization of
@@ -804,12 +831,12 @@ still has to do _some_ preprocessing (like macros).
that.
*pch_defines*::
Be sloppy about **#define**s when precompiling a header file. See
- <<_precompiled_headers,PRECOMPILED HEADERS>> for more information.
+ _<<_precompiled_headers,Precompiled headers>>_ for more information.
*modules*::
By default, ccache will not cache compilations if *-fmodules* is used since
it cannot hash the state of compiler's internal representation of relevant
modules. This sloppiness allows caching in such a case. See
- <<_c_modules,C++ MODULES>> for more information.
+ _<<_c_modules,C++ modules>>_ for more information.
*system_headers*::
By default, ccache will also include all system headers in the manifest.
With this sloppiness set, ccache will only include system headers in the
@@ -819,10 +846,10 @@ still has to do _some_ preprocessing (like macros).
source code.
--
+
-See the discussion under <<_troubleshooting,TROUBLESHOOTING>> for more
+See the discussion under _<<_troubleshooting,Troubleshooting>>_ for more
information.
-[[config_stats]] *stats* (*CCACHE_STATS* or *CCACHE_NOSTATS*, see <<_boolean_values,Boolean values>> above)::
+[[config_stats]] *stats* (*CCACHE_STATS* or *CCACHE_NOSTATS*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will update the statistics counters on each compilation.
The default is true.
@@ -900,11 +927,11 @@ Cache compression
-----------------
Ccache will by default compress all data it puts into the cache using the
-compression algorithm Zstandard (zstd) using compression level 1. The algorithm
-is fast enough that there should be little reason to turn off compression to
-gain performance. One exception is if the cache is located on a compressed file
-system, in which case the compression performed by ccache of course is
-redundant. See the documentation for the configuration options
+compression algorithm http://zstd.net[Zstandard] (zstd) using compression level
+1. The algorithm is fast enough that there should be little reason to turn off
+compression to gain performance. One exception is if the cache is located on a
+compressed file system, in which case the compression performed by ccache of
+course is redundant. See the documentation for the configuration options
<<config_compression,*compression*>> and
<<config_compression_level,*compression_level*>> for more information.
@@ -921,11 +948,11 @@ Incompressible data: 3.5 GB
Notes:
-* The “disk blocks” size is the cache size when taking disk block size into
- account. This value should match the “cache size” value from “ccache
- --show-stats”. The other size numbers refer to actual content sizes.
-* “Compressed data” refers to result and manifest files stored in the cache.
-* “Incompressible data” refers to files that are always stored uncompressed
+* The ``disk blocks'' size is the cache size when taking disk block size into
+ account. This value should match the ``cache size'' value from ``ccache
+ --show-stats''. The other size numbers refer to actual content sizes.
+* ``Compressed data'' refers to result and manifest files stored in the cache.
+* ``Incompressible data'' refers to files that are always stored uncompressed
(triggered by enabling <<config_file_clone,*file_clone*>> or
<<config_hard_link,*hard_link*>>) or unknown files (for instance files
created by older ccache versions).
@@ -977,7 +1004,9 @@ No result was found.
Current size of the cache.
| called for link |
-The compiler was called for linking, not compiling.
+The compiler was called for linking, not compiling. Ccache only supports
+compilation of a single file, i.e. calling the compiler with the *-c* option to
+produce a single object file from a single source file.
| called for preprocessing |
The compiler was called for preprocessing, not compiling.
@@ -1089,7 +1118,7 @@ overhead.
If no previous result is detected (i.e., there is a cache miss) using the
direct mode, ccache will fall back to the preprocessor mode unless the *depend
mode* is enabled. In the depend mode, ccache never runs the preprocessor, not
-even on cache misses. Read more in <<_the_depend_mode,THE DEPEND MODE>>
+even on cache misses. Read more in _<<_the_depend_mode,The depend mode>>_
below.
@@ -1202,6 +1231,34 @@ The depend mode will be disabled if any of the following holds:
* The compiler is not generating dependencies using *-MD* or *-MMD*.
+Handling of newly created header files
+--------------------------------------
+
+If modification time (mtime) or status change time (ctime) of one of the include
+files is the same second as the time compilation is being done, ccache disables
+the direct mode (or, in the case of a <<_precompiled_headers,precompiled
+header>>, disables caching completely). This done as a safety measure to avoid a
+race condition (see below).
+
+To be able to use a newly created header files in direct mode (or use a newly
+precompiled header), either:
+
+* create the include file earlier in the build process, or
+* set <<config_sloppiness,*sloppiness*>> to
+ *include_file_ctime,include_file_mtime* if you are willing to take the risk,
+ for instance if you know that your build system is robust enough not to
+ trigger the race condition.
+
+For reference, the race condition mentioned above consists of these events:
+
+1. The preprocessor is run.
+2. An include file is modified by someone.
+3. The new include file is hashed by ccache.
+4. The real compiler is run on the preprocessor's output, which contains data
+ from the old header file.
+5. The wrong object file is stored in the cache.
+
+
Cache debugging
---------------
@@ -1233,16 +1290,20 @@ Log for this object file.
|==============================================================================
+If <<config_debug_dir,*config_dir*>> (environment variable *CCACHE_DEBUGDIR*) is
+set, the files above will be written to that directory with full absolute paths
+instead of next to the object file.
+
In the direct mode, ccache uses the 160 bit BLAKE3 hash of the
*ccache-input-c* + *ccache-input-d* data (where *+* means concatenation), while
the *ccache-input-c* + *ccache-input-p* data is used in the preprocessor mode.
The *ccache-input-text* file is a combined text version of the three
-binary input files. It has three sections (“COMMON”, “DIRECT MODE” and
-“PREPROCESSOR MODE”), which is turn contain annotations that say what kind of
+binary input files. It has three sections (``COMMON'', ``DIRECT MODE'' and
+``PREPROCESSOR MODE''), which is turn contain annotations that say what kind of
data comes next.
-To debug why you don’t get an expected cache hit for an object file, you can do
+To debug why you don't get an expected cache hit for an object file, you can do
something like this:
1. Build with debug mode enabled.
@@ -1302,6 +1363,10 @@ things to make it work properly:
`__TIMESTAMP__` is used when using a precompiled header. Further, it can't
detect changes in **#define**s in the source code because of how
preprocessing works in combination with precompiled headers.
+* You may also want to include *include_file_mtime,include_file_ctime* in
+ <<config_sloppiness,*sloppiness*>>. See
+ _<<_handling_of_newly_created_header_files,Handling of newly created header
+ files>>_.
* You must either:
+
--
@@ -1429,7 +1494,7 @@ Caveats
-------
* The direct mode fails to pick up new header files in some rare scenarios. See
- <<_the_direct_mode,THE DIRECT MODE>> above.
+ _<<_the_direct_mode,The direct mode>>_ above.
Troubleshooting
@@ -1440,9 +1505,9 @@ General
A general tip for getting information about what ccache is doing is to enable
debug logging by setting the configuration option <<config_debug,*debug*>> (or
-the environment variable *CCACHE_DEBUG*); see <<_cache_debugging,debugging>>
-for more information. Another way of keeping track of what is happening is to
-check the output of *ccache -s*.
+the environment variable *CCACHE_DEBUG*); see _<<_cache_debugging,Cache
+debugging>>_ for more information. Another way of keeping track of what is
+happening is to check the output of *ccache -s*.
Performance
@@ -1472,15 +1537,10 @@ problems and what may be done to increase the hit rate:
*-Wp,-MMD,_path_*, and *-Wp,-D_define_*) is used.
** This was the first compilation with a new value of the
<<config_base_dir,base directory>>.
-** A modification time of one of the include files is too new (created the same
- second as the compilation is being done). This check is made to avoid a race
- condition. To fix this, create the include file earlier in the build
- process, if possible, or set <<config_sloppiness,*sloppiness*>> to
- *include_file_ctime, include_file_mtime* if you are willing to take the risk.
- (The race condition consists of these events: the preprocessor is run; an
- include file is modified by someone; the new include file is hashed by
- ccache; the real compiler is run on the preprocessor's output, which contains
- data from the old header file; the wrong object file is stored in the cache.)
+** A modification or status change time of one of the include files is too new
+ (created the same second as the compilation is being done). See
+ _<<_handling_of_newly_created_header_files,Handling of newly created header
+ files>>_.
** The `__TIME__` preprocessor macro is (potentially) being used. Ccache turns
off direct mode if `__TIME__` is present in the source code. This is done as
a safety measure since the string indicates that a `__TIME__` macro _may_
@@ -1522,12 +1582,12 @@ problems and what may be done to increase the hit rate:
* If ``unsupported compiler option'' has been incremented, enable debug logging
and check which compiler option was rejected.
* If ``preprocessor error'' has been incremented, one possible reason is that
- precompiled headers are being used. See <<_precompiled_headers,PRECOMPILED
- HEADERS>> for how to remedy this.
+ precompiled headers are being used. See _<<_precompiled_headers,Precompiled
+ headers>>_ for how to remedy this.
* If ``can't use precompiled header'' has been incremented, see
- <<_precompiled_headers,PRECOMPILED HEADERS>>.
-* If ``can't use modules'' has been incremented, see
- <<_c_modules,C++ MODULES>>.
+ _<<_precompiled_headers,Precompiled headers>>_.
+* If ``can't use modules'' has been incremented, see _<<_c_modules,C++
+ modules>>_.
Corrupt object files
diff --git a/doc/NEWS.adoc b/doc/NEWS.adoc
index d3e4a9d..a4b3af8 100644
--- a/doc/NEWS.adoc
+++ b/doc/NEWS.adoc
@@ -1,6 +1,116 @@
Ccache news
===========
+Ccache 4.2
+----------
+Release date: 2021-02-02
+
+New features
+~~~~~~~~~~~~
+
+- Improved calculation of relative paths when using `base_dir` to also consider
+ canonical paths (i.e. paths with dereferenced symlinks) as candidates.
+
+- Added a `debug_dir` (`CCACHE_DEBUGDIR`) configuration setting for specifying a
+ directory for files written in debug mode.
+
+- Added support for compiler option `-x cuda`, understood by Clang.
+
+- The value of the `SOURCE_DATE_EPOCH` variable is now only hashed if it
+ potentially affects the output from ccache. This means that ccache now (like
+ before version 4.0) will be able produce cache hits for source code that
+ doesn't contain `__DATE__` or `__TIME__` macros regardless of the value of
+ `SOURCE_DATE_EPOCH`.
+
+
+Bug fixes
+~~~~~~~~~
+
+- Fixed a bug where a non-Clang compiler would silently accept the
+ Clang-specific `-f(no-)color-diagnostics` option when run via ccache. This
+ confused feature detection made by e.g. CMake.
+
+- Improved creation of temporary files on Windows. Previously, ccache would in
+ practice reuse temporary filenames on said platform resulting in various
+ problems with parallel builds.
+
+- Fixed creation of parent directories when creating a lock file on Windows.
+
+- Fixed a race condition related to removal of temporary files.
+
+- Improved calculation of directory name for a Windows-style path.
+
+- A compilation result is now not stored in the cache if an included
+ preprocessed header file is too new. This fixes a bug where the content of a
+ newly created preprocessed header file could be missing from the hash,
+ resulting in a false positive cache hit.
+
+- Fixed calculation of the split DWARF filename for an object filename with zero
+ or multiple dots.
+
+- Fixed retrieval of the object file the destination is `/dev/null`.
+
+
+Portability and build improvements
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Additional compiler flags like `-Wextra -Werror` are now only added when
+ building ccache in developer mode.
+
+- The developer build mode no longer enables `-Weverything` for Clang.
+
+- `_XOPEN_SOURCE` is now defined appropriately on FreeBSD to fix missing
+ declaration of `isascii`.
+
+- Improved detection of buildability of BLAKE3 assembler files.
+
+- Disabled build of inode cache code on OSes without
+ `pthread_mutexattr_setpshared`, such as OpenBSD.
+
+- Made static linking the default for a Windows MinGW build.
+
+- Removed legacy fallback replacements of `mkstemp` and `realpath`.
+
+- Improved detection of SSE/AVX support.
+
+- Improved detection of support for the AVX2 target attribute.
+
+- Configuration scripts now try to detect and enable BLAKE3's Neon support.
+
+- Made it possible to run the integration test suite on macOS.
+
+- Fixed building of 32-bit unit tests on macOS.
+
+- Made it possible to compile ccache for C++17.
+
+- Fixed printing of 64-bit `time_t` on 32-bit architectures like RISCV32.
+
+- Made sure to only use ASCII characters in the manual's AsciiDoc source code to
+ make it possible to generate documentation in non-UTF8 locales.
+
+- Upgraded to optional-lite 3.4.0, fmt 7.1.3, doctest 2.4.4 and zstd 1.4.8.
+
+- Took steps towards being able to run the test suite on Windows.
+
+
+Documentation
+~~~~~~~~~~~~~
+
+- Improved wording of `compiler_check` string values.
+
+- Improved documentation of compression levels and the `-X/--recompress` option.
+
+- Improved consistency of terms in the manual.
+
+- HTML documentation is now built and installed by default if possible.
+
+- Fixed incorrect documentation of configuration option `cache_dir`.
+
+- Added hint on how to link statically with libzstd.
+
+- Mention that ccache requires the `-c` compiler option.
+
+
Ccache 4.1
----------
Release date: 2020-11-22
diff --git a/misc/format-files b/misc/format-files
index 1c0405b..111705a 100755
--- a/misc/format-files
+++ b/misc/format-files
@@ -46,7 +46,7 @@ for file in "$@"; do
echo "Error: $file not formatted with Clang-Format"
echo 'Run "make format" or apply this diff:'
git diff $cf_color --no-index "$file" "$tmp_file" \
- | sed -r -e "s!^---.*!--- a/$file!" \
+ | sed -E -e "s!^---.*!--- a/$file!" \
-e "s!^\+\+\+.*!+++ b/$file!" \
-e "/diff --/d" -e "/index /d" \
-e "s/.[0-9]*.clang-format.tmp//"
diff --git a/src/.clang-tidy b/src/.clang-tidy
index 0f3fa75..f30529d 100644
--- a/src/.clang-tidy
+++ b/src/.clang-tidy
@@ -13,14 +13,17 @@ Checks: '-*,
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-readability-else-after-return,
+ -readability-named-parameter,
-readability-qualified-auto,
- -readability-magic-numbers,
+ -readability-redundant-declaration,
performance-*,
-performance-unnecessary-value-param,
modernize-*,
-modernize-avoid-c-arrays,
-modernize-pass-by-value,
+ -modernize-return-braced-init-list,
-modernize-use-auto,
+ -modernize-use-default-member-init,
-modernize-use-trailing-return-type,
cppcoreguidelines-*,
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
@@ -40,6 +43,8 @@ Checks: '-*,
-cppcoreguidelines-pro-type-reinterpret-cast,
-cppcoreguidelines-pro-type-union-access,
-cppcoreguidelines-narrowing-conversions,
+ -cppcoreguidelines-non-private-member-variables-in-classes,
+ -cppcoreguidelines-special-member-functions,
bugprone-*,
-bugprone-signed-char-misuse,
-bugprone-branch-clone,
@@ -47,6 +52,7 @@ Checks: '-*,
cert-*,
-cert-err34-c,
-cert-dcl50-cpp,
+ -cert-dcl58-cpp,
-cert-err58-cpp,
clang-diagnostic-*,
clang-analyzer-*,
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a88efc9..beefd81 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -52,6 +52,9 @@ if(WIN32)
endif()
add_library(ccache_lib STATIC ${source_files})
+target_compile_definitions(
+ ccache_lib PUBLIC -Dnssv_CONFIG_SELECT_STRING_VIEW=nssv_STRING_VIEW_NONSTD
+)
if(WIN32)
target_link_libraries(ccache_lib PRIVATE ws2_32 "psapi")
@@ -74,6 +77,6 @@ target_link_libraries(
PRIVATE standard_settings standard_warnings ZSTD::ZSTD
Threads::Threads third_party_lib)
-target_include_directories(ccache_lib PRIVATE ${CMAKE_BINARY_DIR} .)
+target_include_directories(ccache_lib PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
add_subdirectory(third_party)
diff --git a/src/CacheFile.hpp b/src/CacheFile.hpp
index 59c180b..0541068 100644
--- a/src/CacheFile.hpp
+++ b/src/CacheFile.hpp
@@ -21,7 +21,6 @@
#include "system.hpp"
#include "Stat.hpp"
-#include "exceptions.hpp"
#include "third_party/nonstd/optional.hpp"
@@ -34,15 +33,12 @@ public:
explicit CacheFile(const std::string& path);
- CacheFile(const CacheFile&) = delete;
- CacheFile& operator=(const CacheFile&) = delete;
-
const Stat& lstat() const;
const std::string& path() const;
Type type() const;
private:
- const std::string m_path;
+ std::string m_path;
mutable nonstd::optional<Stat> m_stat;
};
diff --git a/src/Config.cpp b/src/Config.cpp
index a2e09ca..68dc3f5 100644
--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -20,9 +20,9 @@
#include "AtomicFile.hpp"
#include "Compression.hpp"
+#include "Sloppiness.hpp"
#include "Util.hpp"
#include "assertions.hpp"
-#include "ccache.hpp"
#include "exceptions.hpp"
#include "fmtmacros.hpp"
@@ -52,6 +52,7 @@ enum class ConfigItem {
compression_level,
cpp_extension,
debug,
+ debug_dir,
depend_mode,
direct_mode,
disable,
@@ -92,6 +93,7 @@ const std::unordered_map<std::string, ConfigItem> k_config_key_table = {
{"compression_level", ConfigItem::compression_level},
{"cpp_extension", ConfigItem::cpp_extension},
{"debug", ConfigItem::debug},
+ {"debug_dir", ConfigItem::debug_dir},
{"depend_mode", ConfigItem::depend_mode},
{"direct_mode", ConfigItem::direct_mode},
{"disable", ConfigItem::disable},
@@ -133,6 +135,7 @@ const std::unordered_map<std::string, std::string> k_env_variable_table = {
{"COMPRESSLEVEL", "compression_level"},
{"CPP2", "run_second_cpp"},
{"DEBUG", "debug"},
+ {"DEBUGDIR", "debug_dir"},
{"DEPEND", "depend_mode"},
{"DIR", "cache_dir"},
{"DIRECT", "direct_mode"},
@@ -546,6 +549,9 @@ Config::get_string_value(const std::string& key) const
case ConfigItem::debug:
return format_bool(m_debug);
+ case ConfigItem::debug_dir:
+ return m_debug_dir;
+
case ConfigItem::depend_mode:
return format_bool(m_depend_mode);
@@ -756,6 +762,10 @@ Config::set_item(const std::string& key,
m_debug = parse_bool(value, env_var_key, negate);
break;
+ case ConfigItem::debug_dir:
+ m_debug_dir = value;
+ break;
+
case ConfigItem::depend_mode:
m_depend_mode = parse_bool(value, env_var_key, negate);
break;
diff --git a/src/Config.hpp b/src/Config.hpp
index a5945fa..eb574dc 100644
--- a/src/Config.hpp
+++ b/src/Config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -51,6 +51,7 @@ public:
int8_t compression_level() const;
const std::string& cpp_extension() const;
bool debug() const;
+ const std::string& debug_dir() const;
bool depend_mode() const;
bool direct_mode() const;
bool disable() const;
@@ -83,7 +84,7 @@ public:
void set_cache_dir(const std::string& value);
void set_cpp_extension(const std::string& value);
void set_compiler(const std::string& value);
- void set_compiler_type(CompilerType compiler_type);
+ void set_compiler_type(CompilerType value);
void set_depend_mode(bool value);
void set_debug(bool value);
void set_direct_mode(bool value);
@@ -133,34 +134,35 @@ private:
std::string m_secondary_config_path;
bool m_absolute_paths_in_stderr = false;
- std::string m_base_dir = "";
+ std::string m_base_dir;
std::string m_cache_dir;
- std::string m_compiler = "";
+ std::string m_compiler;
std::string m_compiler_check = "mtime";
CompilerType m_compiler_type = CompilerType::auto_guess;
bool m_compression = true;
int8_t m_compression_level = 0; // Use default level
- std::string m_cpp_extension = "";
+ std::string m_cpp_extension;
bool m_debug = false;
+ std::string m_debug_dir;
bool m_depend_mode = false;
bool m_direct_mode = true;
bool m_disable = false;
- std::string m_extra_files_to_hash = "";
+ std::string m_extra_files_to_hash;
bool m_file_clone = false;
bool m_hard_link = false;
bool m_hash_dir = true;
- std::string m_ignore_headers_in_manifest = "";
- std::string m_ignore_options = "";
+ std::string m_ignore_headers_in_manifest;
+ std::string m_ignore_options;
bool m_inode_cache = false;
bool m_keep_comments_cpp = false;
double m_limit_multiple = 0.8;
- std::string m_log_file = "";
+ std::string m_log_file;
uint64_t m_max_files = 0;
uint64_t m_max_size = 5ULL * 1000 * 1000 * 1000;
- std::string m_path = "";
+ std::string m_path;
bool m_pch_external_checksum = false;
- std::string m_prefix_command = "";
- std::string m_prefix_command_cpp = "";
+ std::string m_prefix_command;
+ std::string m_prefix_command_cpp;
bool m_read_only = false;
bool m_read_only_direct = false;
bool m_recache = false;
@@ -243,6 +245,12 @@ Config::debug() const
return m_debug;
}
+inline const std::string&
+Config::debug_dir() const
+{
+ return m_debug_dir;
+}
+
inline bool
Config::depend_mode() const
{
diff --git a/src/Context.cpp b/src/Context.cpp
index 7706b7d..a7ce450 100644
--- a/src/Context.cpp
+++ b/src/Context.cpp
@@ -56,9 +56,10 @@ Context::register_pending_tmp_file(const std::string& path)
void
Context::unlink_pending_tmp_files_signal_safe()
{
- for (const std::string& path : m_pending_tmp_files) {
+ for (auto it = m_pending_tmp_files.rbegin(); it != m_pending_tmp_files.rend();
+ ++it) {
// Don't call Util::unlink_tmp since its log calls aren't signal safe.
- unlink(path.c_str());
+ unlink(it->c_str());
}
// Don't clear m_pending_tmp_files since this method must be signal safe.
}
@@ -68,8 +69,9 @@ Context::unlink_pending_tmp_files()
{
SignalHandlerBlocker signal_handler_blocker;
- for (const std::string& path : m_pending_tmp_files) {
- Util::unlink_tmp(path, Util::UnlinkLog::ignore_failure);
+ for (auto it = m_pending_tmp_files.rbegin(); it != m_pending_tmp_files.rend();
+ ++it) {
+ Util::unlink_tmp(*it, Util::UnlinkLog::ignore_failure);
}
m_pending_tmp_files.clear();
}
diff --git a/src/Context.hpp b/src/Context.hpp
index 7af0705..c021124 100644
--- a/src/Context.hpp
+++ b/src/Context.hpp
@@ -23,11 +23,12 @@
#include "Args.hpp"
#include "ArgsInfo.hpp"
#include "Config.hpp"
+#include "Counters.hpp"
#include "Digest.hpp"
#include "File.hpp"
#include "MiniTrace.hpp"
#include "NonCopyable.hpp"
-#include "ccache.hpp"
+#include "Sloppiness.hpp"
#ifdef INODE_CACHE_SUPPORTED
# include "InodeCache.hpp"
diff --git a/src/Counters.cpp b/src/Counters.cpp
index 2e1b0e2..1263d9d 100644
--- a/src/Counters.cpp
+++ b/src/Counters.cpp
@@ -18,7 +18,7 @@
#include "Counters.hpp"
-#include "Statistics.hpp"
+#include "Statistic.hpp"
#include "assertions.hpp"
#include <algorithm>
diff --git a/src/Depfile.hpp b/src/Depfile.hpp
index 770f789..7250a4c 100644
--- a/src/Depfile.hpp
+++ b/src/Depfile.hpp
@@ -16,6 +16,8 @@
// this program; if not, write to the Free Software Foundation, Inc., 51
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#pragma once
+
class Context;
class Hash;
diff --git a/src/Hash.cpp b/src/Hash.cpp
index ccc6f7b..61cc5a3 100644
--- a/src/Hash.cpp
+++ b/src/Hash.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -106,7 +106,7 @@ bool
Hash::hash_fd(int fd)
{
return Util::read_fd(
- fd, [=](const void* data, size_t size) { hash(data, size); });
+ fd, [this](const void* data, size_t size) { hash(data, size); });
}
bool
diff --git a/src/InodeCache.cpp b/src/InodeCache.cpp
index a0e97a1..5e473ec 100644
--- a/src/InodeCache.cpp
+++ b/src/InodeCache.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -202,9 +202,13 @@ InodeCache::hash_inode(const std::string& path,
return true;
}
-InodeCache::Bucket*
-InodeCache::acquire_bucket(uint32_t index)
+bool
+InodeCache::with_bucket(const Digest& key_digest,
+ const BucketHandler& bucket_handler)
{
+ uint32_t hash;
+ Util::big_endian_to_int(key_digest.bytes(), hash);
+ const uint32_t index = hash % k_num_buckets;
Bucket* bucket = &m_sr->buckets[index];
int err = pthread_mutex_lock(&bucket->mt);
#ifdef HAVE_PTHREAD_MUTEX_ROBUST
@@ -217,7 +221,7 @@ InodeCache::acquire_bucket(uint32_t index)
LOG(
"Can't consolidate stale mutex at index {}: {}", index, strerror(err));
LOG_RAW("Consider removing the inode cache file if the problem persists");
- return nullptr;
+ return false;
}
LOG("Wiping bucket at index {} because of stale mutex", index);
memset(bucket->entries, 0, sizeof(Bucket::entries));
@@ -227,26 +231,20 @@ InodeCache::acquire_bucket(uint32_t index)
LOG("Failed to lock mutex at index {}: {}", index, strerror(err));
LOG_RAW("Consider removing the inode cache file if problem persists");
++m_sr->errors;
- return nullptr;
+ return false;
}
#ifdef HAVE_PTHREAD_MUTEX_ROBUST
}
#endif
- return bucket;
-}
-
-InodeCache::Bucket*
-InodeCache::acquire_bucket(const Digest& key_digest)
-{
- uint32_t hash;
- Util::big_endian_to_int(key_digest.bytes(), hash);
- return acquire_bucket(hash % k_num_buckets);
-}
-void
-InodeCache::release_bucket(Bucket* bucket)
-{
+ try {
+ bucket_handler(bucket);
+ } catch (...) {
+ pthread_mutex_unlock(&bucket->mt);
+ throw;
+ }
pthread_mutex_unlock(&bucket->mt);
+ return true;
}
bool
@@ -370,31 +368,28 @@ InodeCache::get(const std::string& path,
return false;
}
- Bucket* bucket = acquire_bucket(key_digest);
-
- if (!bucket) {
- return false;
- }
-
bool found = false;
-
- for (uint32_t i = 0; i < k_num_entries; ++i) {
- if (bucket->entries[i].key_digest == key_digest) {
- if (i > 0) {
- Entry tmp = bucket->entries[i];
- memmove(&bucket->entries[1], &bucket->entries[0], sizeof(Entry) * i);
- bucket->entries[0] = tmp;
- }
-
- file_digest = bucket->entries[0].file_digest;
- if (return_value) {
- *return_value = bucket->entries[0].return_value;
+ const bool success = with_bucket(key_digest, [&](Bucket* const bucket) {
+ for (uint32_t i = 0; i < k_num_entries; ++i) {
+ if (bucket->entries[i].key_digest == key_digest) {
+ if (i > 0) {
+ Entry tmp = bucket->entries[i];
+ memmove(&bucket->entries[1], &bucket->entries[0], sizeof(Entry) * i);
+ bucket->entries[0] = tmp;
+ }
+
+ file_digest = bucket->entries[0].file_digest;
+ if (return_value) {
+ *return_value = bucket->entries[0].return_value;
+ }
+ found = true;
+ break;
}
- found = true;
- break;
}
+ });
+ if (!success) {
+ return false;
}
- release_bucket(bucket);
LOG("inode cache {}: {}", found ? "hit" : "miss", path);
@@ -404,7 +399,7 @@ InodeCache::get(const std::string& path,
} else {
++m_sr->misses;
}
- LOG("accumulated stats for inode cache: hits={}, misses={}, errors={}",
+ LOG("Accumulated stats for inode cache: hits={}, misses={}, errors={}",
m_sr->hits.load(),
m_sr->misses.load(),
m_sr->errors.load());
@@ -427,22 +422,20 @@ InodeCache::put(const std::string& path,
return false;
}
- Bucket* bucket = acquire_bucket(key_digest);
+ const bool success = with_bucket(key_digest, [&](Bucket* const bucket) {
+ memmove(&bucket->entries[1],
+ &bucket->entries[0],
+ sizeof(Entry) * (k_num_entries - 1));
+
+ bucket->entries[0].key_digest = key_digest;
+ bucket->entries[0].file_digest = file_digest;
+ bucket->entries[0].return_value = return_value;
+ });
- if (!bucket) {
+ if (!success) {
return false;
}
- memmove(&bucket->entries[1],
- &bucket->entries[0],
- sizeof(Entry) * (k_num_entries - 1));
-
- bucket->entries[0].key_digest = key_digest;
- bucket->entries[0].file_digest = file_digest;
- bucket->entries[0].return_value = return_value;
-
- release_bucket(bucket);
-
LOG("inode cache insert: {}", path);
return true;
diff --git a/src/InodeCache.hpp b/src/InodeCache.hpp
index 68d42ae..f2d049a 100644
--- a/src/InodeCache.hpp
+++ b/src/InodeCache.hpp
@@ -22,6 +22,7 @@
#include "config.h"
+#include <functional>
#include <string>
class Config;
@@ -94,13 +95,13 @@ private:
struct Entry;
struct Key;
struct SharedRegion;
+ using BucketHandler = std::function<void(Bucket* bucket)>;
bool mmap_file(const std::string& inode_cache_file);
static bool
hash_inode(const std::string& path, ContentType type, Digest& digest);
- Bucket* acquire_bucket(uint32_t index);
- Bucket* acquire_bucket(const Digest& key_digest);
- static void release_bucket(Bucket* bucket);
+ bool with_bucket(const Digest& key_digest,
+ const BucketHandler& bucket_handler);
static bool create_new_file(const std::string& filename);
bool initialize();
diff --git a/src/Lockfile.cpp b/src/Lockfile.cpp
index 715ffa4..fb9d5e4 100644
--- a/src/Lockfile.cpp
+++ b/src/Lockfile.cpp
@@ -160,7 +160,7 @@ do_acquire_win32(const std::string& lockfile, uint32_t staleness_limit)
error);
if (error == ERROR_PATH_NOT_FOUND) {
// Directory doesn't exist?
- if (Util::create_dir(Util::dir_name(lockfile)) == 0) {
+ if (Util::create_dir(Util::dir_name(lockfile))) {
// OK. Retry.
continue;
}
diff --git a/src/Logging.cpp b/src/Logging.cpp
index 9a5d99b..c6590d8 100644
--- a/src/Logging.cpp
+++ b/src/Logging.cpp
@@ -1,5 +1,5 @@
// Copyright (C) 2002 Andrew Tridgell
-// Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2009-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -88,7 +88,10 @@ do_log(string_view message, bool bulk)
if (tm) {
strftime(timestamp, sizeof(timestamp), "%Y-%m-%dT%H:%M:%S", &*tm);
} else {
- snprintf(timestamp, sizeof(timestamp), "%lu", tv.tv_sec);
+ snprintf(timestamp,
+ sizeof(timestamp),
+ "%llu",
+ static_cast<long long unsigned int>(tv.tv_sec));
}
snprintf(prefix,
sizeof(prefix),
diff --git a/src/Manifest.cpp b/src/Manifest.cpp
index 9ee87dc..38aec0c 100644
--- a/src/Manifest.cpp
+++ b/src/Manifest.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2009-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -28,8 +28,8 @@
#include "File.hpp"
#include "Hash.hpp"
#include "Logging.hpp"
+#include "Sloppiness.hpp"
#include "StdMakeUnique.hpp"
-#include "ccache.hpp"
#include "fmtmacros.hpp"
#include "hashutil.hpp"
@@ -587,6 +587,8 @@ put(const Config& config,
} catch (const Error& e) {
LOG("Error: {}", e.what());
}
+ } else {
+ LOG_RAW("The entry already exists in the manifest, not adding");
}
return false;
}
diff --git a/src/NonCopyable.hpp b/src/NonCopyable.hpp
index 86004a9..37fe7e7 100644
--- a/src/NonCopyable.hpp
+++ b/src/NonCopyable.hpp
@@ -20,10 +20,10 @@
class NonCopyable
{
-protected:
- NonCopyable() = default;
-
-private:
+public:
NonCopyable(const NonCopyable&) = delete;
NonCopyable& operator=(const NonCopyable&) = delete;
+
+protected:
+ NonCopyable() = default;
};
diff --git a/src/Result.cpp b/src/Result.cpp
index ef8ac74..b20aa62 100644
--- a/src/Result.cpp
+++ b/src/Result.cpp
@@ -27,7 +27,7 @@
#include "File.hpp"
#include "Logging.hpp"
#include "Stat.hpp"
-#include "Statistics.hpp"
+#include "Statistic.hpp"
#include "Util.hpp"
#include "exceptions.hpp"
#include "fmtmacros.hpp"
diff --git a/src/ResultRetriever.cpp b/src/ResultRetriever.cpp
index 77e044d..957bbee 100644
--- a/src/ResultRetriever.cpp
+++ b/src/ResultRetriever.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -40,8 +40,13 @@ ResultRetriever::on_entry_start(uint32_t entry_number,
uint64_t file_len,
nonstd::optional<std::string> raw_file)
{
- std::string dest_path;
+ LOG("Reading {} entry #{} {} ({} bytes)",
+ raw_file ? "raw" : "embedded",
+ entry_number,
+ Result::file_type_to_string(file_type),
+ file_len);
+ std::string dest_path;
m_dest_file_type = file_type;
switch (file_type) {
@@ -50,6 +55,8 @@ ResultRetriever::on_entry_start(uint32_t entry_number,
break;
case FileType::dependency:
+ // Dependency file: Open destination file but accumulate data in m_dest_data
+ // and write it in on_entry_end.
if (m_ctx.args_info.generating_dependencies) {
dest_path = m_ctx.args_info.output_dep;
m_dest_data.reserve(file_len);
@@ -57,8 +64,10 @@ ResultRetriever::on_entry_start(uint32_t entry_number,
break;
case FileType::stderr_output:
+ // Stderr data: Don't open a destination file. Instead accumulate it in
+ // m_dest_data and write it in on_entry_end.
m_dest_data.reserve(file_len);
- return;
+ break;
case FileType::coverage_unmangled:
if (m_ctx.args_info.generating_coverage) {
@@ -92,46 +101,39 @@ ResultRetriever::on_entry_start(uint32_t entry_number,
break;
}
- if (dest_path.empty()) {
- LOG_RAW("Not copying");
+ if (file_type == FileType::stderr_output) {
+ // Written in on_entry_end.
+ } else if (dest_path.empty()) {
+ LOG_RAW("Not writing");
} else if (dest_path == "/dev/null") {
- LOG_RAW("Not copying to /dev/null");
+ LOG_RAW("Not writing to /dev/null");
+ } else if (raw_file) {
+ Util::clone_hard_link_or_copy_file(m_ctx, *raw_file, dest_path, false);
+
+ // Update modification timestamp to save the file from LRU cleanup (and, if
+ // hard-linked, to make the object file newer than the source file).
+ Util::update_mtime(*raw_file);
} else {
- LOG("Retrieving {} file #{} {} ({} bytes)",
- raw_file ? "raw" : "embedded",
- entry_number,
- Result::file_type_to_string(file_type),
- file_len);
-
- if (raw_file) {
- Util::clone_hard_link_or_copy_file(m_ctx, *raw_file, dest_path, false);
-
- // Update modification timestamp to save the file from LRU cleanup (and,
- // if hard-linked, to make the object file newer than the source file).
- Util::update_mtime(*raw_file);
- } else {
- LOG("Copying to {}", dest_path);
- m_dest_fd = Fd(
- open(dest_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666));
- if (!m_dest_fd) {
- throw Error(
- "Failed to open {} for writing: {}", dest_path, strerror(errno));
- }
- m_dest_path = dest_path;
+ LOG("Writing to {}", dest_path);
+ m_dest_fd = Fd(
+ open(dest_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666));
+ if (!m_dest_fd) {
+ throw Error(
+ "Failed to open {} for writing: {}", dest_path, strerror(errno));
}
+ m_dest_path = dest_path;
}
}
void
ResultRetriever::on_entry_data(const uint8_t* data, size_t size)
{
- ASSERT((m_dest_file_type == FileType::stderr_output && !m_dest_fd)
- || (m_dest_file_type != FileType::stderr_output && m_dest_fd));
+ ASSERT(!(m_dest_file_type == FileType::stderr_output && m_dest_fd));
if (m_dest_file_type == FileType::stderr_output
|| (m_dest_file_type == FileType::dependency && !m_dest_path.empty())) {
m_dest_data.append(reinterpret_cast<const char*>(data), size);
- } else {
+ } else if (m_dest_fd) {
try {
Util::write_fd(*m_dest_fd, data, size);
} catch (Error& e) {
@@ -144,6 +146,7 @@ void
ResultRetriever::on_entry_end()
{
if (m_dest_file_type == FileType::stderr_output) {
+ LOG("Writing to file descriptor {}", STDERR_FILENO);
Util::send_to_stderr(m_ctx, m_dest_data);
} else if (m_dest_file_type == FileType::dependency && !m_dest_path.empty()) {
write_dependency_file();
diff --git a/src/SignalHandler.cpp b/src/SignalHandler.cpp
index 37604c2..8a07cc7 100644
--- a/src/SignalHandler.cpp
+++ b/src/SignalHandler.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -18,11 +18,12 @@
#include "SignalHandler.hpp"
-#include "assertions.hpp"
-
#ifndef _WIN32
# include "Context.hpp"
+# include "assertions.hpp"
+
+# include <signal.h> // NOLINT: sigaddset et al are defined in signal.h
namespace {
diff --git a/src/SignalHandler.hpp b/src/SignalHandler.hpp
index 50e7b0e..3ef8847 100644
--- a/src/SignalHandler.hpp
+++ b/src/SignalHandler.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -20,8 +20,6 @@
#include "system.hpp"
-#include "signal.h"
-
class Context;
class SignalHandler
diff --git a/src/Sloppiness.hpp b/src/Sloppiness.hpp
new file mode 100644
index 0000000..bd2078e
--- /dev/null
+++ b/src/Sloppiness.hpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+enum Sloppiness {
+ SLOPPY_INCLUDE_FILE_MTIME = 1 << 0,
+ SLOPPY_INCLUDE_FILE_CTIME = 1 << 1,
+ SLOPPY_TIME_MACROS = 1 << 2,
+ SLOPPY_PCH_DEFINES = 1 << 3,
+ // Allow us to match files based on their stats (size, mtime, ctime), without
+ // looking at their contents.
+ SLOPPY_FILE_STAT_MATCHES = 1 << 4,
+ // Allow us to not include any system headers in the manifest include files,
+ // similar to -MM versus -M for dependencies.
+ SLOPPY_SYSTEM_HEADERS = 1 << 5,
+ // Allow us to ignore ctimes when comparing file stats, so we can fake mtimes
+ // if we want to (it is much harder to fake ctimes, requires changing clock)
+ SLOPPY_FILE_STAT_MATCHES_CTIME = 1 << 6,
+ // Allow us to not include the -index-store-path option in the manifest hash.
+ SLOPPY_CLANG_INDEX_STORE = 1 << 7,
+ // Ignore locale settings.
+ SLOPPY_LOCALE = 1 << 8,
+ // Allow caching even if -fmodules is used.
+ SLOPPY_MODULES = 1 << 9,
+};
diff --git a/src/Statistic.hpp b/src/Statistic.hpp
new file mode 100644
index 0000000..cd6cda6
--- /dev/null
+++ b/src/Statistic.hpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+// Statistics fields in storage order.
+enum class Statistic {
+ none = 0,
+ compiler_produced_stdout = 1,
+ compile_failed = 2,
+ internal_error = 3,
+ cache_miss = 4,
+ preprocessor_error = 5,
+ could_not_find_compiler = 6,
+ missing_cache_file = 7,
+ preprocessed_cache_hit = 8,
+ bad_compiler_arguments = 9,
+ called_for_link = 10,
+ files_in_cache = 11,
+ cache_size_kibibyte = 12,
+ obsolete_max_files = 13,
+ obsolete_max_size = 14,
+ unsupported_source_language = 15,
+ bad_output_file = 16,
+ no_input_file = 17,
+ multiple_source_files = 18,
+ autoconf_test = 19,
+ unsupported_compiler_option = 20,
+ output_to_stdout = 21,
+ direct_cache_hit = 22,
+ compiler_produced_no_output = 23,
+ compiler_produced_empty_output = 24,
+ error_hashing_extra_file = 25,
+ compiler_check_failed = 26,
+ could_not_use_precompiled_header = 27,
+ called_for_preprocessing = 28,
+ cleanups_performed = 29,
+ unsupported_code_directive = 30,
+ stats_zeroed_timestamp = 31,
+ could_not_use_modules = 32,
+
+ END
+};
diff --git a/src/Statistics.hpp b/src/Statistics.hpp
index 61721b4..34a9982 100644
--- a/src/Statistics.hpp
+++ b/src/Statistics.hpp
@@ -21,6 +21,7 @@
#include "system.hpp"
#include "Counters.hpp"
+#include "Statistic.hpp" // Any reasonable use of Statistics requires the Statistic enum.
#include "third_party/nonstd/optional.hpp"
@@ -29,45 +30,6 @@
class Config;
-// Statistics fields in storage order.
-enum class Statistic {
- none = 0,
- compiler_produced_stdout = 1,
- compile_failed = 2,
- internal_error = 3,
- cache_miss = 4,
- preprocessor_error = 5,
- could_not_find_compiler = 6,
- missing_cache_file = 7,
- preprocessed_cache_hit = 8,
- bad_compiler_arguments = 9,
- called_for_link = 10,
- files_in_cache = 11,
- cache_size_kibibyte = 12,
- obsolete_max_files = 13,
- obsolete_max_size = 14,
- unsupported_source_language = 15,
- bad_output_file = 16,
- no_input_file = 17,
- multiple_source_files = 18,
- autoconf_test = 19,
- unsupported_compiler_option = 20,
- output_to_stdout = 21,
- direct_cache_hit = 22,
- compiler_produced_no_output = 23,
- compiler_produced_empty_output = 24,
- error_hashing_extra_file = 25,
- compiler_check_failed = 26,
- could_not_use_precompiled_header = 27,
- called_for_preprocessing = 28,
- cleanups_performed = 29,
- unsupported_code_directive = 30,
- stats_zeroed_timestamp = 31,
- could_not_use_modules = 32,
-
- END
-};
-
namespace Statistics {
// Read counters from `path`. No lock is acquired.
diff --git a/src/TemporaryFile.cpp b/src/TemporaryFile.cpp
index 0c64f5e..feaa5f1 100644
--- a/src/TemporaryFile.cpp
+++ b/src/TemporaryFile.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -20,6 +20,10 @@
#include "Util.hpp"
+#ifdef _WIN32
+# include "third_party/win32/mktemp.h"
+#endif
+
using nonstd::string_view;
namespace {
@@ -39,30 +43,23 @@ get_umask()
}
#endif
-#ifndef HAVE_MKSTEMP
-// Cheap and nasty mkstemp replacement.
-int
-mkstemp(char* name_template)
-{
-# ifdef __GNUC__
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-# endif
- mktemp(name_template);
-# ifdef __GNUC__
-# pragma GCC diagnostic pop
-# endif
- return open(name_template, O_RDWR | O_CREAT | O_EXCL | O_BINARY, 0600);
-}
-#endif
-
} // namespace
TemporaryFile::TemporaryFile(string_view path_prefix)
: path(std::string(path_prefix) + ".XXXXXX")
{
Util::ensure_dir_exists(Util::dir_name(path));
+#ifdef _WIN32
+ // MSVC lacks mkstemp() and Mingw-w64's implementation[1] is problematic, as
+ // it can reuse the names of recently-deleted files unless the caller
+ // remembers to call srand().
+
+ // [1]: <https://github.com/Alexpux/mingw-w64/blob/
+ // d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-crt/misc/mkstemp.c>
+ fd = Fd(bsd_mkstemp(&path[0]));
+#else
fd = Fd(mkstemp(&path[0]));
+#endif
if (!fd) {
throw Fatal(
"Failed to create temporary file for {}: {}", path, strerror(errno));
diff --git a/src/Util.cpp b/src/Util.cpp
index bac6b7d..71de5f9 100644
--- a/src/Util.cpp
+++ b/src/Util.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -290,16 +290,17 @@ clone_hard_link_or_copy_file(const Context& ctx,
#endif
}
if (ctx.config.hard_link()) {
- unlink(dest.c_str());
LOG("Hard linking {} to {}", source, dest);
- int ret = link(source.c_str(), dest.c_str());
- if (ret == 0) {
+ try {
+ Util::hard_link(source, dest);
if (chmod(dest.c_str(), 0444) != 0) {
LOG("Failed to chmod: {}", strerror(errno));
}
return;
+ } catch (const Error& e) {
+ LOG_RAW(e.what());
+ // Fall back to copying.
}
- LOG("Failed to hard link: {}", strerror(errno));
}
LOG("Copying {} to {}", source, dest);
@@ -412,9 +413,19 @@ dir_name(string_view path)
#endif
size_t n = path.find_last_of(delim);
if (n == std::string::npos) {
+ // "foo" -> "."
return ".";
+ } else if (n == 0) {
+ // "/" -> "/" (Windows: or "\\" -> "\\")
+ return path.substr(0, 1);
+#ifdef _WIN32
+ } else if (n == 2 && path[1] == ':') {
+ // Windows: "C:\\foo" -> "C:\\" or "C:/foo" -> "C:/"
+ return path.substr(0, 3);
+#endif
} else {
- return n == 0 ? "/" : path.substr(0, n);
+ // "/dir/foo" -> "/dir" (Windows: or "C:\\dir\\foo" -> "C:\\dir")
+ return path.substr(0, n);
}
}
@@ -643,13 +654,14 @@ get_extension(string_view path)
}
}
-void
+std::vector<CacheFile>
get_level_1_files(const std::string& dir,
- const ProgressReceiver& progress_receiver,
- std::vector<std::shared_ptr<CacheFile>>& files)
+ const ProgressReceiver& progress_receiver)
{
+ std::vector<CacheFile> files;
+
if (!Stat::stat(dir)) {
- return;
+ return files;
}
size_t level_2_directories = 0;
@@ -661,7 +673,7 @@ get_level_1_files(const std::string& dir,
}
if (!is_dir) {
- files.push_back(std::make_shared<CacheFile>(path));
+ files.emplace_back(path);
} else if (path != dir
&& path.find('/', dir.size() + 1) == std::string::npos) {
++level_2_directories;
@@ -670,6 +682,7 @@ get_level_1_files(const std::string& dir,
});
progress_receiver(1.0);
+ return files;
}
std::string
@@ -777,6 +790,30 @@ get_path_in_cache(string_view cache_dir, uint8_t level, string_view name)
return path;
}
+void
+hard_link(const std::string& oldpath, const std::string& newpath)
+{
+ // Assumption: newpath may already exist as a left-over file from a previous
+ // run, but it's only we who can create the file entry now so we don't try to
+ // handle a race between unlink() and link() below.
+ unlink(newpath.c_str());
+
+#ifndef _WIN32
+ if (link(oldpath.c_str(), newpath.c_str()) != 0) {
+ throw Error(
+ "failed to link {} to {}: {}", oldpath, newpath, strerror(errno));
+ }
+#else
+ if (!CreateHardLink(newpath.c_str(), oldpath.c_str(), nullptr)) {
+ DWORD error = GetLastError();
+ throw Error("failed to link {} to {}: {}",
+ oldpath,
+ newpath,
+ Win32Util::error_message(error));
+ }
+#endif
+}
+
bool
is_absolute_path(string_view path)
{
@@ -833,10 +870,12 @@ localtime(optional<time_t> time)
}
std::string
-make_relative_path(const Context& ctx, string_view path)
+make_relative_path(const std::string& base_dir,
+ const std::string& actual_cwd,
+ const std::string& apparent_cwd,
+ nonstd::string_view path)
{
- if (ctx.config.base_dir().empty()
- || !Util::starts_with(path, ctx.config.base_dir())) {
+ if (base_dir.empty() || !Util::starts_with(path, base_dir)) {
return std::string(path);
}
@@ -858,28 +897,36 @@ make_relative_path(const Context& ctx, string_view path)
// The algorithm for computing relative paths below only works for existing
// paths. If the path doesn't exist, find the first ancestor directory that
// does exist and assemble the path again afterwards.
- string_view original_path = path;
- std::string path_suffix;
+
+ std::vector<std::string> relpath_candidates;
+ const auto original_path = path;
Stat path_stat;
while (!(path_stat = Stat::stat(std::string(path)))) {
path = Util::dir_name(path);
}
- path_suffix = std::string(original_path.substr(path.length()));
+ const auto path_suffix = std::string(original_path.substr(path.length()));
+ const auto real_path = Util::real_path(std::string(path));
- std::string path_str(path);
- std::string normalized_path = Util::normalize_absolute_path(path_str);
- std::vector<std::string> relpath_candidates = {
- Util::get_relative_path(ctx.actual_cwd, normalized_path),
- };
- if (ctx.apparent_cwd != ctx.actual_cwd) {
- relpath_candidates.emplace_back(
- Util::get_relative_path(ctx.apparent_cwd, normalized_path));
- // Move best (= shortest) match first:
- if (relpath_candidates[0].length() > relpath_candidates[1].length()) {
- std::swap(relpath_candidates[0], relpath_candidates[1]);
+ const auto add_relpath_candidates = [&](nonstd::string_view path) {
+ const std::string normalized_path = Util::normalize_absolute_path(path);
+ relpath_candidates.push_back(
+ Util::get_relative_path(actual_cwd, normalized_path));
+ if (apparent_cwd != actual_cwd) {
+ relpath_candidates.emplace_back(
+ Util::get_relative_path(apparent_cwd, normalized_path));
}
+ };
+ add_relpath_candidates(path);
+ if (real_path != path) {
+ add_relpath_candidates(real_path);
}
+ // Find best (i.e. shortest existing) match:
+ std::sort(relpath_candidates.begin(),
+ relpath_candidates.end(),
+ [](const std::string& path1, const std::string& path2) {
+ return path1.length() < path2.length();
+ });
for (const auto& relpath : relpath_candidates) {
if (Stat::stat(relpath).same_inode_as(path_stat)) {
return relpath + path_suffix;
@@ -890,6 +937,13 @@ make_relative_path(const Context& ctx, string_view path)
return std::string(original_path);
}
+std::string
+make_relative_path(const Context& ctx, string_view path)
+{
+ return make_relative_path(
+ ctx.config.base_dir(), ctx.actual_cwd, ctx.apparent_cwd, path);
+}
+
bool
matches_dir_prefix_or_file(string_view dir_prefix_or_file, string_view path)
{
@@ -1201,15 +1255,7 @@ real_path(const std::string& path, bool return_empty_on_error)
resolved = buffer;
}
#else
- // Yes, there are such systems. This replacement relies on the fact that when
- // we call x_realpath we only care about symlinks.
- {
- ssize_t len = readlink(path.c_str(), buffer, buffer_size - 1);
- if (len != -1) {
- buffer[len] = 0;
- resolved = buffer;
- }
- }
+# error No realpath function available
#endif
return resolved ? resolved : (return_empty_on_error ? "" : path);
diff --git a/src/Util.hpp b/src/Util.hpp
index 3fbab45..7db8d95 100644
--- a/src/Util.hpp
+++ b/src/Util.hpp
@@ -208,10 +208,9 @@ nonstd::string_view get_extension(nonstd::string_view path);
// Parameters:
// - dir: The directory to traverse recursively.
// - progress_receiver: Function that will be called for progress updates.
-// - files: Found files.
-void get_level_1_files(const std::string& dir,
- const ProgressReceiver& progress_receiver,
- std::vector<std::shared_ptr<CacheFile>>& files);
+std::vector<CacheFile>
+get_level_1_files(const std::string& dir,
+ const ProgressReceiver& progress_receiver);
// Return the current user's home directory, or throw `Fatal` if it can't
// be determined.
@@ -234,6 +233,9 @@ std::string get_path_in_cache(nonstd::string_view cache_dir,
uint8_t level,
nonstd::string_view name);
+// Hard-link `oldpath` to `newpath`. Throws `Error` on error.
+void hard_link(const std::string& oldpath, const std::string& newpath);
+
// Write bytes in big endian order from an integer value.
//
// Parameters:
@@ -306,8 +308,14 @@ bool is_precompiled_header(nonstd::string_view path);
// time of day is used.
nonstd::optional<tm> localtime(nonstd::optional<time_t> time = {});
-// Make a relative path from current working directory to `path` if `path` is
-// under the base directory.
+// Make a relative path from current working directory (either `actual_cwd` or
+// `apparent_cwd`) to `path` if `path` is under `base_dir`.
+std::string make_relative_path(const std::string& base_dir,
+ const std::string& actual_cwd,
+ const std::string& apparent_cwd,
+ nonstd::string_view path);
+
+// Like above but with base directory and apparent/actual CWD taken from `ctx`.
std::string make_relative_path(const Context& ctx, nonstd::string_view path);
// Return whether `path` is equal to `dir_prefix_or_file` or if
diff --git a/src/argprocessing.cpp b/src/argprocessing.cpp
index 1783f9f..612eadd 100644
--- a/src/argprocessing.cpp
+++ b/src/argprocessing.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -48,7 +48,6 @@ struct ArgumentProcessingState
bool found_rewrite_includes = false;
std::string explicit_language; // As specified with -x.
- std::string file_language; // As deduced from file extension.
std::string input_charset_option; // -finput-charset=...
// Is the dependency makefile name overridden with -MF?
@@ -688,6 +687,19 @@ process_arg(Context& ctx,
return nullopt;
}
+ if (config.compiler_type() != CompilerType::clang
+ && (args[i] == "-fcolor-diagnostics"
+ || args[i] == "-fno-color-diagnostics")) {
+ // Special case: If a non-Clang compiler gets -f(no-)color-diagnostics we'll
+ // bail out and just execute the compiler. The reason is that we don't
+ // include -f(no-)color-diagnostics in the hash so there can be a false
+ // cache hit in the following scenario:
+ //
+ // 1. ccache gcc -c example.c # adds a cache entry
+ // 2. ccache gcc -c example.c -fcolor-diagnostics # unexpectedly succeeds
+ return Statistic::unsupported_compiler_option;
+ }
+
if (args[i] == "-fcolor-diagnostics" || args[i] == "-fdiagnostics-color"
|| args[i] == "-fdiagnostics-color=always") {
state.color_diagnostics = ColorDiagnostics::always;
@@ -841,7 +853,7 @@ process_arg(Context& ctx,
}
if (!args_info.input_file.empty()) {
- if (!language_for_file(args[i]).empty()) {
+ if (supported_source_extension(args[i])) {
LOG("Multiple input files: {} and {}", args_info.input_file, args[i]);
return Statistic::multiple_source_files;
} else if (!state.found_c_opt && !state.found_dc_opt) {
@@ -980,7 +992,6 @@ process_args(Context& ctx)
if (!state.explicit_language.empty() && state.explicit_language == "none") {
state.explicit_language.clear();
}
- state.file_language = language_for_file(args_info.input_file);
if (!state.explicit_language.empty()) {
if (!language_is_supported(state.explicit_language)) {
LOG("Unsupported language: {}", state.explicit_language);
@@ -988,7 +999,8 @@ process_args(Context& ctx)
}
args_info.actual_language = state.explicit_language;
} else {
- args_info.actual_language = state.file_language;
+ args_info.actual_language =
+ language_for_file(args_info.input_file, config.compiler_type());
}
args_info.output_is_precompiled_header =
@@ -1021,8 +1033,11 @@ process_args(Context& ctx)
return Statistic::unsupported_source_language;
}
- if (!config.run_second_cpp() && args_info.actual_language == "cu") {
- LOG_RAW("Using CUDA compiler; not compiling preprocessed code");
+ if (!config.run_second_cpp()
+ && (args_info.actual_language == "cu"
+ || args_info.actual_language == "cuda")) {
+ LOG("Source language is \"{}\"; not compiling preprocessed code",
+ args_info.actual_language);
config.set_run_second_cpp(true);
}
@@ -1056,12 +1071,6 @@ process_args(Context& ctx)
}
if (args_info.seen_split_dwarf) {
- size_t pos = args_info.output_obj.rfind('.');
- if (pos == std::string::npos || pos == args_info.output_obj.size() - 1) {
- LOG_RAW("Badly formed object filename");
- return Statistic::bad_compiler_arguments;
- }
-
args_info.output_dwo = Util::change_extension(args_info.output_obj, ".dwo");
}
diff --git a/src/argprocessing.hpp b/src/argprocessing.hpp
index c040c44..a8e8f3a 100644
--- a/src/argprocessing.hpp
+++ b/src/argprocessing.hpp
@@ -19,7 +19,7 @@
#pragma once
#include "Args.hpp"
-#include "Statistics.hpp"
+#include "Statistic.hpp"
#include "third_party/nonstd/optional.hpp"
@@ -27,10 +27,10 @@ class Context;
struct ProcessArgsResult
{
- ProcessArgsResult(Statistic error);
- ProcessArgsResult(const Args& preprocessor_args,
- const Args& extra_args_to_hash,
- const Args& compiler_args);
+ ProcessArgsResult(Statistic error_);
+ ProcessArgsResult(const Args& preprocessor_args_,
+ const Args& extra_args_to_hash_,
+ const Args& compiler_args_);
// nullopt on success, otherwise the statistics counter that should be
// incremented.
diff --git a/src/ccache.cpp b/src/ccache.cpp
index b7c3013..10724c9 100644
--- a/src/ccache.cpp
+++ b/src/ccache.cpp
@@ -1,5 +1,5 @@
// Copyright (C) 2002-2007 Andrew Tridgell
-// Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2009-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -40,6 +40,7 @@
#include "ResultExtractor.hpp"
#include "ResultRetriever.hpp"
#include "SignalHandler.hpp"
+#include "Statistics.hpp"
#include "StdMakeUnique.hpp"
#include "TemporaryFile.hpp"
#include "UmaskScope.hpp"
@@ -87,7 +88,7 @@ constexpr const char VERSION_TEXT[] =
R"({} version {}
Copyright (C) 2002-2007 Andrew Tridgell
-Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+Copyright (C) 2009-2021 Joel Rosdahl and other contributors
See <https://ccache.dev/credits.html> for a complete list of contributors.
@@ -119,8 +120,9 @@ Common options:
-M, --max-size SIZE set maximum size of cache to SIZE (use 0 for no
limit); available suffixes: k, M, G, T (decimal)
and Ki, Mi, Gi, Ti (binary); default suffix: G
- -X, --recompress LEVEL recompress the cache to LEVEL (integer level or
- "uncompressed")
+ -X, --recompress LEVEL recompress the cache to level LEVEL (integer or
+ "uncompressed") using the Zstandard algorithm;
+ see "Cache compression" in the manual for details
-o, --set-config KEY=VAL set configuration item KEY to value VAL
-x, --show-compression show compression statistics
-p, --show-config show current configuration options in
@@ -145,7 +147,7 @@ Options for scripting or debugging:
--print-stats print statistics counter IDs and corresponding
values in machine-parsable format
-See also <https://ccache.dev>.
+See also the manual on <https://ccache.dev/documentation.html>.
)";
// How often (in seconds) to scan $CCACHE_DIR/tmp for left-over temporary
@@ -180,6 +182,45 @@ const uint8_t k_max_cache_levels = 4;
// stored in the cache changes in a backwards-incompatible way.
const char HASH_PREFIX[] = "3";
+namespace {
+
+// Throw a Failure if ccache did not succeed in getting or putting a result in
+// the cache. If `exit_code` is set, just exit with that code directly,
+// otherwise execute the real compiler and exit with its exit code. Also updates
+// statistics counter `statistic` if it's not `Statistic::none`.
+class Failure : public std::exception
+{
+public:
+ Failure(Statistic statistic,
+ nonstd::optional<int> exit_code = nonstd::nullopt);
+
+ nonstd::optional<int> exit_code() const;
+ Statistic statistic() const;
+
+private:
+ Statistic m_statistic;
+ nonstd::optional<int> m_exit_code;
+};
+
+inline Failure::Failure(Statistic statistic, nonstd::optional<int> exit_code)
+ : m_statistic(statistic), m_exit_code(exit_code)
+{
+}
+
+inline nonstd::optional<int>
+Failure::exit_code() const
+{
+ return m_exit_code;
+}
+
+inline Statistic
+Failure::statistic() const
+{
+ return m_statistic;
+}
+
+} // namespace
+
static void
add_prefix(const Context& ctx, Args& args, const std::string& prefix_command)
{
@@ -231,10 +272,25 @@ clean_up_internal_tempdir(const Config& config)
});
}
+static std::string
+prepare_debug_path(const std::string& debug_dir,
+ const std::string& output_obj,
+ string_view suffix)
+{
+ const std::string prefix =
+ debug_dir.empty() ? output_obj : debug_dir + Util::real_path(output_obj);
+ try {
+ Util::ensure_dir_exists(Util::dir_name(prefix));
+ } catch (Error&) {
+ // Ignore since we can't handle an error in another way in this context. The
+ // caller takes care of logging when trying to open the path for writing.
+ }
+ return FMT("{}.ccache-{}", prefix, suffix);
+}
+
static void
init_hash_debug(Context& ctx,
Hash& hash,
- string_view obj_path,
char type,
string_view section_name,
FILE* debug_text_file)
@@ -243,7 +299,8 @@ init_hash_debug(Context& ctx,
return;
}
- std::string path = FMT("{}.ccache-input-{}", obj_path, type);
+ const auto path = prepare_debug_path(
+ ctx.config.debug_dir(), ctx.args_info.output_obj, FMT("input-{}", type));
File debug_binary_file(path, "wb");
if (debug_binary_file) {
hash.enable_debug(section_name, debug_binary_file.get(), debug_text_file);
@@ -291,14 +348,39 @@ guess_compiler(string_view path)
}
static bool
+include_file_too_new(const Context& ctx,
+ const std::string& path,
+ const Stat& path_stat)
+{
+ // The comparison using >= is intentional, due to a possible race between
+ // starting compilation and writing the include file. See also the notes under
+ // "Performance" in doc/MANUAL.adoc.
+ if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_MTIME)
+ && path_stat.mtime() >= ctx.time_of_compilation) {
+ LOG("Include file {} too new", path);
+ return true;
+ }
+
+ // The same >= logic as above applies to the change time of the file.
+ if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_CTIME)
+ && path_stat.ctime() >= ctx.time_of_compilation) {
+ LOG("Include file {} ctime too new", path);
+ return true;
+ }
+
+ return false;
+}
+
+// Returns false if the include file was "too new" and therefore should disable
+// the direct mode (or, in the case of a preprocessed header, fall back to just
+// running the real compiler), otherwise true.
+static bool
do_remember_include_file(Context& ctx,
std::string path,
Hash& cpp_hash,
bool system,
Hash* depend_mode_hash)
{
- bool is_pch = false;
-
if (path.length() >= 2 && path[0] == '<' && path[path.length() - 1] == '>') {
// Typically <built-in> or <command-line>.
return true;
@@ -355,26 +437,27 @@ do_remember_include_file(Context& ctx,
}
}
- // The comparison using >= is intentional, due to a possible race between
- // starting compilation and writing the include file. See also the notes
- // under "Performance" in doc/MANUAL.adoc.
- if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_MTIME)
- && st.mtime() >= ctx.time_of_compilation) {
- LOG("Include file {} too new", path);
- return false;
- }
+ const bool is_pch = Util::is_precompiled_header(path);
+ const bool too_new = include_file_too_new(ctx, path, st);
+
+ if (too_new) {
+ // Opt out of direct mode because of a race condition.
+ //
+ // The race condition consists of these events:
+ //
+ // - the preprocessor is run
+ // - an include file is modified by someone
+ // - the new include file is hashed by ccache
+ // - the real compiler is run on the preprocessor's output, which contains
+ // data from the old header file
+ // - the wrong object file is stored in the cache.
- // The same >= logic as above applies to the change time of the file.
- if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_CTIME)
- && st.ctime() >= ctx.time_of_compilation) {
- LOG("Include file {} ctime too new", path);
return false;
}
// Let's hash the include file content.
Hash fhash;
- is_pch = Util::is_precompiled_header(path);
if (is_pch) {
if (ctx.included_pch_file.empty()) {
LOG("Detected use of precompiled header: {}", path);
@@ -419,20 +502,28 @@ do_remember_include_file(Context& ctx,
return true;
}
+enum class RememberIncludeFileResult { ok, cannot_use_pch };
+
// This function hashes an include file and stores the path and hash in
// ctx.included_files. If the include file is a PCH, cpp_hash is also updated.
-static void
+static RememberIncludeFileResult
remember_include_file(Context& ctx,
const std::string& path,
Hash& cpp_hash,
bool system,
Hash* depend_mode_hash)
{
- if (!do_remember_include_file(ctx, path, cpp_hash, system, depend_mode_hash)
- && ctx.config.direct_mode()) {
- LOG_RAW("Disabling direct mode");
- ctx.config.set_direct_mode(false);
+ if (!do_remember_include_file(
+ ctx, path, cpp_hash, system, depend_mode_hash)) {
+ if (Util::is_precompiled_header(path)) {
+ return RememberIncludeFileResult::cannot_use_pch;
+ } else if (ctx.config.direct_mode()) {
+ LOG_RAW("Disabling direct mode");
+ ctx.config.set_direct_mode(false);
+ }
}
+
+ return RememberIncludeFileResult::ok;
}
static void
@@ -449,7 +540,10 @@ print_included_files(const Context& ctx, FILE* fp)
// - Makes include file paths for which the base directory is a prefix relative
// when computing the hash sum.
// - Stores the paths and hashes of included files in ctx.included_files.
-static bool
+//
+// Returns Statistic::none on success, otherwise a statistics counter to be
+// incremented.
+static Statistic
process_preprocessed_file(Context& ctx,
Hash& hash,
const std::string& path,
@@ -459,7 +553,7 @@ process_preprocessed_file(Context& ctx,
try {
data = Util::read_file(path);
} catch (Error&) {
- return false;
+ return Statistic::internal_error;
}
// Bytes between p and q are pending to be hashed.
@@ -540,7 +634,7 @@ process_preprocessed_file(Context& ctx,
q++;
if (q >= end) {
LOG_RAW("Failed to parse included file path");
- return false;
+ return Statistic::internal_error;
}
// q points to the beginning of an include file path
hash.hash(p, q - p);
@@ -582,7 +676,10 @@ process_preprocessed_file(Context& ctx,
hash.hash(inc_path);
}
- remember_include_file(ctx, inc_path, hash, system, nullptr);
+ if (remember_include_file(ctx, inc_path, hash, system, nullptr)
+ == RememberIncludeFileResult::cannot_use_pch) {
+ return Statistic::could_not_use_precompiled_header;
+ }
p = q; // Everything of interest between p and q has been hashed now.
} else if (q[0] == '.' && q[1] == 'i' && q[2] == 'n' && q[3] == 'c'
&& q[4] == 'b' && q[5] == 'i' && q[6] == 'n') {
@@ -627,7 +724,7 @@ process_preprocessed_file(Context& ctx,
print_included_files(ctx, stdout);
}
- return true;
+ return Statistic::none;
}
// Extract the used includes from the dependency file. Note that we cannot
@@ -1080,7 +1177,12 @@ get_result_name_from_cpp(Context& ctx, Args& args, Hash& hash)
TemporaryFile tmp_stdout(
FMT("{}/tmp.cpp_stdout", ctx.config.temporary_dir()));
- stdout_path = tmp_stdout.path;
+ ctx.register_pending_tmp_file(tmp_stdout.path);
+
+ // stdout_path needs the proper cpp_extension for the compiler to do its
+ // thing correctly.
+ stdout_path = FMT("{}.{}", tmp_stdout.path, ctx.config.cpp_extension());
+ Util::hard_link(tmp_stdout.path, stdout_path);
ctx.register_pending_tmp_file(stdout_path);
TemporaryFile tmp_stderr(
@@ -1115,9 +1217,11 @@ get_result_name_from_cpp(Context& ctx, Args& args, Hash& hash)
}
hash.hash_delimiter("cpp");
- bool is_pump = ctx.config.compiler_type() == CompilerType::pump;
- if (!process_preprocessed_file(ctx, hash, stdout_path, is_pump)) {
- throw Failure(Statistic::internal_error);
+ const bool is_pump = ctx.config.compiler_type() == CompilerType::pump;
+ const Statistic error =
+ process_preprocessed_file(ctx, hash, stdout_path, is_pump);
+ if (error != Statistic::none) {
+ throw Failure(error);
}
hash.hash_delimiter("cppstderr");
@@ -1130,11 +1234,7 @@ get_result_name_from_cpp(Context& ctx, Args& args, Hash& hash)
if (ctx.args_info.direct_i_file) {
ctx.i_tmpfile = ctx.args_info.input_file;
} else {
- // i_tmpfile needs the proper cpp_extension for the compiler to do its
- // thing correctly
- ctx.i_tmpfile = FMT("{}.{}", stdout_path, ctx.config.cpp_extension());
- Util::rename(stdout_path, ctx.i_tmpfile);
- ctx.register_pending_tmp_file(ctx.i_tmpfile);
+ ctx.i_tmpfile = stdout_path;
}
if (!ctx.config.run_second_cpp()) {
@@ -1275,7 +1375,7 @@ hash_common_info(const Context& ctx,
"COMPILER_PATH",
"GCC_COMPARE_DEBUG",
"GCC_EXEC_PREFIX",
- "SOURCE_DATE_EPOCH",
+ // Note: SOURCE_DATE_EPOCH is handled in hash_source_code_string().
};
for (const char* name : always_hash_env_vars) {
const char* value = getenv(name);
@@ -2170,8 +2270,8 @@ finalize_at_exit(Context& ctx)
// Dump log buffer last to not lose any logs.
if (ctx.config.debug() && !ctx.args_info.output_obj.empty()) {
- const auto path = FMT("{}.ccache-log", ctx.args_info.output_obj);
- Logging::dump_log(path);
+ Logging::dump_log(prepare_debug_path(
+ ctx.config.debug_dir(), ctx.args_info.output_obj, "log"));
}
}
@@ -2319,7 +2419,8 @@ do_cache_compilation(Context& ctx, const char* const* argv)
MTR_META_THREAD_NAME(ctx.args_info.output_obj.c_str());
if (ctx.config.debug()) {
- std::string path = FMT("{}.ccache-input-text", ctx.args_info.output_obj);
+ const auto path = prepare_debug_path(
+ ctx.config.debug_dir(), ctx.args_info.output_obj, "input-text");
File debug_text_file(path, "w");
if (debug_text_file) {
ctx.hash_debug_files.push_back(std::move(debug_text_file));
@@ -2333,8 +2434,7 @@ do_cache_compilation(Context& ctx, const char* const* argv)
: nullptr;
Hash common_hash;
- init_hash_debug(
- ctx, common_hash, ctx.args_info.output_obj, 'c', "COMMON", debug_text_file);
+ init_hash_debug(ctx, common_hash, 'c', "COMMON", debug_text_file);
MTR_BEGIN("hash", "common_hash");
hash_common_info(
@@ -2343,12 +2443,7 @@ do_cache_compilation(Context& ctx, const char* const* argv)
// Try to find the hash using the manifest.
Hash direct_hash = common_hash;
- init_hash_debug(ctx,
- direct_hash,
- ctx.args_info.output_obj,
- 'd',
- "DIRECT MODE",
- debug_text_file);
+ init_hash_debug(ctx, direct_hash, 'd', "DIRECT MODE", debug_text_file);
Args args_to_hash = processed.preprocessor_args;
args_to_hash.push_back(processed.extra_args_to_hash);
@@ -2392,12 +2487,7 @@ do_cache_compilation(Context& ctx, const char* const* argv)
// Find the hash using the preprocessed output. Also updates
// ctx.included_files.
Hash cpp_hash = common_hash;
- init_hash_debug(ctx,
- cpp_hash,
- ctx.args_info.output_obj,
- 'p',
- "PREPROCESSOR MODE",
- debug_text_file);
+ init_hash_debug(ctx, cpp_hash, 'p', "PREPROCESSOR MODE", debug_text_file);
MTR_BEGIN("hash", "cpp_hash");
result_name = calculate_result_name(
diff --git a/src/ccache.hpp b/src/ccache.hpp
index 7f833ee..bf34cb0 100644
--- a/src/ccache.hpp
+++ b/src/ccache.hpp
@@ -32,26 +32,6 @@ class Context;
extern const char CCACHE_VERSION[];
-const uint32_t SLOPPY_INCLUDE_FILE_MTIME = 1 << 0;
-const uint32_t SLOPPY_INCLUDE_FILE_CTIME = 1 << 1;
-const uint32_t SLOPPY_TIME_MACROS = 1 << 2;
-const uint32_t SLOPPY_PCH_DEFINES = 1 << 3;
-// Allow us to match files based on their stats (size, mtime, ctime), without
-// looking at their contents.
-const uint32_t SLOPPY_FILE_STAT_MATCHES = 1 << 4;
-// Allow us to not include any system headers in the manifest include files,
-// similar to -MM versus -M for dependencies.
-const uint32_t SLOPPY_SYSTEM_HEADERS = 1 << 5;
-// Allow us to ignore ctimes when comparing file stats, so we can fake mtimes
-// if we want to (it is much harder to fake ctimes, requires changing clock)
-const uint32_t SLOPPY_FILE_STAT_MATCHES_CTIME = 1 << 6;
-// Allow us to not include the -index-store-path option in the manifest hash.
-const uint32_t SLOPPY_CLANG_INDEX_STORE = 1 << 7;
-// Ignore locale settings.
-const uint32_t SLOPPY_LOCALE = 1 << 8;
-// Allow caching even if -fmodules is used.
-const uint32_t SLOPPY_MODULES = 1 << 9;
-
using FindExecutableFunction =
std::function<std::string(const Context& ctx,
const std::string& name,
diff --git a/src/cleanup.cpp b/src/cleanup.cpp
index 6d91d77..5c76ebb 100644
--- a/src/cleanup.cpp
+++ b/src/cleanup.cpp
@@ -23,6 +23,7 @@
#include "Config.hpp"
#include "Context.hpp"
#include "Logging.hpp"
+#include "Statistics.hpp"
#include "Util.hpp"
#ifdef INODE_CACHE_SUPPORTED
@@ -90,9 +91,8 @@ clean_up_dir(const std::string& subdir,
{
LOG("Cleaning up cache directory {}", subdir);
- std::vector<std::shared_ptr<CacheFile>> files;
- Util::get_level_1_files(
- subdir, [&](double progress) { progress_receiver(progress / 3); }, files);
+ std::vector<CacheFile> files = Util::get_level_1_files(
+ subdir, [&](double progress) { progress_receiver(progress / 3); });
uint64_t cache_size = 0;
uint64_t files_in_cache = 0;
@@ -102,29 +102,27 @@ clean_up_dir(const std::string& subdir,
++i, progress_receiver(1.0 / 3 + 1.0 * i / files.size() / 3)) {
const auto& file = files[i];
- if (!file->lstat().is_regular()) {
+ if (!file.lstat().is_regular()) {
// Not a file or missing file.
continue;
}
// Delete any tmp files older than 1 hour right away.
- if (file->lstat().mtime() + 3600 < current_time
- && Util::base_name(file->path()).find(".tmp.") != std::string::npos) {
- Util::unlink_tmp(file->path());
+ if (file.lstat().mtime() + 3600 < current_time
+ && Util::base_name(file.path()).find(".tmp.") != std::string::npos) {
+ Util::unlink_tmp(file.path());
continue;
}
- cache_size += file->lstat().size_on_disk();
+ cache_size += file.lstat().size_on_disk();
files_in_cache += 1;
}
// Sort according to modification time, oldest first.
- std::sort(files.begin(),
- files.end(),
- [](const std::shared_ptr<CacheFile>& f1,
- const std::shared_ptr<CacheFile>& f2) {
- return f1->lstat().mtime() < f2->lstat().mtime();
- });
+ std::sort(
+ files.begin(), files.end(), [](const CacheFile& f1, const CacheFile& f2) {
+ return f1.lstat().mtime() < f2.lstat().mtime();
+ });
LOG("Before cleanup: {:.0f} KiB, {:.0f} files",
static_cast<double>(cache_size) / 1024,
@@ -135,27 +133,26 @@ clean_up_dir(const std::string& subdir,
++i, progress_receiver(2.0 / 3 + 1.0 * i / files.size() / 3)) {
const auto& file = files[i];
- if (!file->lstat() || file->lstat().is_directory()) {
+ if (!file.lstat() || file.lstat().is_directory()) {
continue;
}
if ((max_size == 0 || cache_size <= max_size)
&& (max_files == 0 || files_in_cache <= max_files)
&& (max_age == 0
- || file->lstat().mtime()
+ || file.lstat().mtime()
> (current_time - static_cast<int64_t>(max_age)))) {
break;
}
- if (Util::ends_with(file->path(), ".stderr")) {
+ if (Util::ends_with(file.path(), ".stderr")) {
// In order to be nice to legacy ccache versions, make sure that the .o
// file is deleted before .stderr, because if the ccache process gets
// killed after deleting the .stderr but before deleting the .o, the
// cached result will be inconsistent. (.stderr is the only file that is
// optional for legacy ccache versions; any other file missing from the
// cache will be detected.)
- std::string o_file =
- file->path().substr(0, file->path().size() - 6) + "o";
+ std::string o_file = file.path().substr(0, file.path().size() - 6) + "o";
// Don't subtract this extra deletion from the cache size; that
// bookkeeping will be done when the loop reaches the .o file. If the
@@ -167,7 +164,7 @@ clean_up_dir(const std::string& subdir,
}
delete_file(
- file->path(), file->lstat().size_on_disk(), &cache_size, &files_in_cache);
+ file.path(), file.lstat().size_on_disk(), &cache_size, &files_in_cache);
cleaned = true;
}
@@ -207,12 +204,11 @@ wipe_dir(const std::string& subdir,
{
LOG("Clearing out cache directory {}", subdir);
- std::vector<std::shared_ptr<CacheFile>> files;
- Util::get_level_1_files(
- subdir, [&](double progress) { progress_receiver(progress / 2); }, files);
+ const std::vector<CacheFile> files = Util::get_level_1_files(
+ subdir, [&](double progress) { progress_receiver(progress / 2); });
for (size_t i = 0; i < files.size(); ++i) {
- Util::unlink_safe(files[i]->path());
+ Util::unlink_safe(files[i].path());
progress_receiver(0.5 + 0.5 * i / files.size());
}
diff --git a/src/compress.cpp b/src/compress.cpp
index 42e0179..1164b79 100644
--- a/src/compress.cpp
+++ b/src/compress.cpp
@@ -221,23 +221,20 @@ compress_stats(const Config& config,
config.cache_dir(),
[&](const std::string& subdir,
const Util::ProgressReceiver& sub_progress_receiver) {
- std::vector<std::shared_ptr<CacheFile>> files;
- Util::get_level_1_files(
- subdir,
- [&](double progress) { sub_progress_receiver(progress / 2); },
- files);
+ const std::vector<CacheFile> files = Util::get_level_1_files(
+ subdir, [&](double progress) { sub_progress_receiver(progress / 2); });
for (size_t i = 0; i < files.size(); ++i) {
const auto& cache_file = files[i];
- on_disk_size += cache_file->lstat().size_on_disk();
+ on_disk_size += cache_file.lstat().size_on_disk();
try {
- auto file = open_file(cache_file->path(), "rb");
- auto reader = create_reader(*cache_file, file.get());
- compr_size += cache_file->lstat().size();
+ auto file = open_file(cache_file.path(), "rb");
+ auto reader = create_reader(cache_file, file.get());
+ compr_size += cache_file.lstat().size();
content_size += reader->content_size();
} catch (Error&) {
- incompr_size += cache_file->lstat().size();
+ incompr_size += cache_file.lstat().size();
}
sub_progress_receiver(1.0 / 2 + 1.0 * i / files.size() / 2);
@@ -290,27 +287,26 @@ compress_recompress(Context& ctx,
ctx.config.cache_dir(),
[&](const std::string& subdir,
const Util::ProgressReceiver& sub_progress_receiver) {
- std::vector<std::shared_ptr<CacheFile>> files;
- Util::get_level_1_files(
- subdir,
- [&](double progress) { sub_progress_receiver(0.1 * progress); },
- files);
+ std::vector<CacheFile> files =
+ Util::get_level_1_files(subdir, [&](double progress) {
+ sub_progress_receiver(0.1 * progress);
+ });
auto stats_file = subdir + "/stats";
for (size_t i = 0; i < files.size(); ++i) {
const auto& file = files[i];
- if (file->type() != CacheFile::Type::unknown) {
+ if (file.type() != CacheFile::Type::unknown) {
thread_pool.enqueue([&statistics, stats_file, file, level] {
try {
- recompress_file(statistics, stats_file, *file, level);
+ recompress_file(statistics, stats_file, file, level);
} catch (Error&) {
// Ignore for now.
}
});
} else {
- statistics.update(0, 0, 0, file->lstat().size());
+ statistics.update(0, 0, 0, file.lstat().size());
}
sub_progress_receiver(0.1 + 0.9 * i / files.size());
diff --git a/src/exceptions.hpp b/src/exceptions.hpp
index 1c73141..f35f50c 100644
--- a/src/exceptions.hpp
+++ b/src/exceptions.hpp
@@ -21,7 +21,6 @@
#include "system.hpp"
#include "FormatNonstdStringView.hpp"
-#include "Statistics.hpp"
#include "third_party/fmt/core.h"
#include "third_party/nonstd/optional.hpp"
@@ -80,38 +79,3 @@ inline Fatal::Fatal(T&&... args)
: ErrorBase(fmt::format(std::forward<T>(args)...))
{
}
-
-// Throw a Failure if ccache did not succeed in getting or putting a result in
-// the cache. If `exit_code` is set, just exit with that code directly,
-// otherwise execute the real compiler and exit with its exit code. Also updates
-// statistics counter `statistic` if it's not `Statistic::none`.
-class Failure : public std::exception
-{
-public:
- Failure(Statistic statistic,
- nonstd::optional<int> exit_code = nonstd::nullopt);
-
- nonstd::optional<int> exit_code() const;
- Statistic statistic() const;
-
-private:
- Statistic m_statistic;
- nonstd::optional<int> m_exit_code;
-};
-
-inline Failure::Failure(Statistic statistic, nonstd::optional<int> exit_code)
- : m_statistic(statistic), m_exit_code(exit_code)
-{
-}
-
-inline nonstd::optional<int>
-Failure::exit_code() const
-{
- return m_exit_code;
-}
-
-inline Statistic
-Failure::statistic() const
-{
- return m_statistic;
-}
diff --git a/src/hashutil.cpp b/src/hashutil.cpp
index 072d821..7378c02 100644
--- a/src/hashutil.cpp
+++ b/src/hashutil.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2009-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -23,8 +23,8 @@
#include "Context.hpp"
#include "Hash.hpp"
#include "Logging.hpp"
+#include "Sloppiness.hpp"
#include "Stat.hpp"
-#include "ccache.hpp"
#include "execute.hpp"
#include "fmtmacros.hpp"
#include "macroskip.hpp"
@@ -245,6 +245,15 @@ hash_source_code_string(const Context& ctx,
hash.hash(now->tm_year);
hash.hash(now->tm_mon);
hash.hash(now->tm_mday);
+
+ // If the compiler has support for it, the expansion of __DATE__ will change
+ // according to the value of SOURCE_DATE_EPOCH. Note: We have to hash both
+ // SOURCE_DATE_EPOCH and the current date since we can't be sure that the
+ // compiler honors SOURCE_DATE_EPOCH.
+ const auto source_date_epoch = getenv("SOURCE_DATE_EPOCH");
+ if (source_date_epoch) {
+ hash.hash(source_date_epoch);
+ }
}
if (result & HASH_SOURCE_CODE_FOUND_TIME) {
// We don't know for sure that the program actually uses the __TIME__ macro,
@@ -254,6 +263,7 @@ hash_source_code_string(const Context& ctx,
// __TIME__ has been found so that the direct mode can be disabled.
LOG("Found __TIME__ in {}", path);
}
+
if (result & HASH_SOURCE_CODE_FOUND_TIMESTAMP) {
LOG("Found __TIMESTAMP__ in {}", path);
diff --git a/src/language.cpp b/src/language.cpp
index 70325ea..aa1a2ca 100644
--- a/src/language.cpp
+++ b/src/language.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2010-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2010-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -67,7 +67,7 @@ const struct
{".HXX", "c++-header"},
{".tcc", "c++-header"},
{".TCC", "c++-header"},
- {".cu", "cu"},
+ {".cu", "cu"}, // Special case in language_for_file: "cuda" for Clang
{".hip", "hip"},
{nullptr, nullptr},
};
@@ -84,7 +84,8 @@ const struct
{"c++", "c++-cpp-output"},
{"c++-cpp-output", "c++-cpp-output"},
{"c++-header", "c++-cpp-output"},
- {"cu", "cpp-output"},
+ {"cu", "cpp-output"}, // NVCC
+ {"cuda", "cpp-output"}, // Clang
{"hip", "cpp-output"},
{"objective-c", "objective-c-cpp-output"},
{"objective-c-header", "objective-c-cpp-output"},
@@ -101,10 +102,26 @@ const struct
} // namespace
+bool
+supported_source_extension(const std::string& fname)
+{
+ const auto ext = Util::get_extension(fname);
+ for (size_t i = 0; k_ext_lang_table[i].extension; ++i) {
+ if (k_ext_lang_table[i].extension == ext) {
+ return true;
+ }
+ }
+ return false;
+}
+
std::string
-language_for_file(const std::string& fname)
+language_for_file(const std::string& fname, CompilerType compiler_type)
{
auto ext = Util::get_extension(fname);
+ if (ext == ".cu" && compiler_type == CompilerType::clang) {
+ // Special case: Clang maps .cu to cuda.
+ return "cuda";
+ }
for (size_t i = 0; k_ext_lang_table[i].extension; ++i) {
if (k_ext_lang_table[i].extension == ext) {
return k_ext_lang_table[i].language;
diff --git a/src/language.hpp b/src/language.hpp
index 69f7f26..99bf386 100644
--- a/src/language.hpp
+++ b/src/language.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2010-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2010-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -20,11 +20,17 @@
#include "system.hpp"
+#include "Config.hpp"
+
#include <string>
-// Guess the language of `fname` based on its extension. Returns the empty
-// string if the extension is unknown.
-std::string language_for_file(const std::string& fname);
+// Return whether a filename has a supported source code extension.
+bool supported_source_extension(const std::string& fname);
+
+// Guess the language of `fname` based on its extension and a compiler type.
+// Returns the empty string if the extension is unknown.
+std::string language_for_file(const std::string& fname,
+ CompilerType compiler_type);
// Return the preprocessed language for `language`, or the empty string if
// unknown.
diff --git a/src/system.hpp b/src/system.hpp
index 79d07ef..ae4ca52 100644
--- a/src/system.hpp
+++ b/src/system.hpp
@@ -139,7 +139,6 @@ const mode_t S_IWUSR = mode_t(_S_IWRITE);
# define NOMINMAX 1
# include <windows.h>
# define mkdir(a, b) _mkdir(a)
-# define link(src, dst) (CreateHardLink(dst, src, nullptr) ? 0 : -1)
# define execv(a, b) win32execute(a, b, 0, -1, -1)
# define strncasecmp _strnicmp
# define strcasecmp _stricmp
@@ -172,7 +171,7 @@ DLLIMPORT extern char** environ;
# define O_BINARY 0
#endif
-#ifdef HAVE_SYS_MMAN_H
+#if defined(HAVE_SYS_MMAN_H) && defined(HAVE_PTHREAD_MUTEXATTR_SETPSHARED)
# define INODE_CACHE_SUPPORTED
#endif
diff --git a/src/third_party/CMakeLists.txt b/src/third_party/CMakeLists.txt
index d40110a..ed0ff9e 100644
--- a/src/third_party/CMakeLists.txt
+++ b/src/third_party/CMakeLists.txt
@@ -6,6 +6,10 @@ else()
target_compile_definitions(third_party_lib PUBLIC -DSTATIC_GETOPT)
endif()
+if(WIN32)
+ target_sources(third_party_lib PRIVATE win32/mktemp.c)
+endif ()
+
if(ENABLE_TRACING)
target_sources(third_party_lib PRIVATE minitrace.c)
endif()
@@ -36,7 +40,7 @@ endif()
# Treat third party headers as system files (no warning for those headers).
target_include_directories(
third_party_lib
- PRIVATE ${CMAKE_BINARY_DIR} . SYSTEM)
+ PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} SYSTEM)
target_link_libraries(third_party_lib PRIVATE standard_settings)
target_link_libraries(third_party_lib INTERFACE blake3)
diff --git a/src/third_party/blake3/CMakeLists.txt b/src/third_party/blake3/CMakeLists.txt
index a75e561..581ee81 100644
--- a/src/third_party/blake3/CMakeLists.txt
+++ b/src/third_party/blake3/CMakeLists.txt
@@ -2,50 +2,116 @@ add_library(blake3 STATIC blake3.c blake3_dispatch_ccache.c blake3_portable.c)
target_link_libraries(blake3 PRIVATE standard_settings)
-if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SIZEOF_VOID_P EQUAL 8
- AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
- AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0))
- set(blake_source_type asm)
- set(blake_suffix "_x86-64_unix.S")
-else()
- set(blake_source_type c)
- set(blake_suffix ".c")
+if(MSVC)
+ # No object file is created if masm is passed the compile options from standard_settings,
+ # so don't pass any flags at all to assembler (as no flags are needed anyway).
+ string(REPLACE "<FLAGS> " "" CMAKE_ASM_MASM_COMPILE_OBJECT "${CMAKE_ASM_MASM_COMPILE_OBJECT}")
endif()
-include(CheckAsmCompilerFlag)
-include(CheckCCompilerFlag)
-
-function(add_source_if_enabled feature compile_flags)
- string(TOUPPER "have_${blake_source_type}_${feature}" have_feature)
-
- # AVX512 support fails to compile with old Apple Clang versions even though
- # the compiler accepts the -m flags.
- if(${feature} STREQUAL "avx512"
- AND CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
- AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
- message(STATUS "Detected unsupported compiler for ${have_feature} - disabled")
- set(${have_feature} FALSE)
- elseif(${blake_source_type} STREQUAL "asm")
- check_asm_compiler_flag(${compile_flags} ${have_feature})
+include(CheckCSourceCompiles)
+
+function(add_source_if_enabled feature msvc_flags others_flags intrinsic)
+ if(MSVC)
+ set(compile_flags "${msvc_flags}")
else()
- check_c_compiler_flag(${compile_flags} ${have_feature})
+ set(compile_flags "${others_flags}")
+ endif()
+
+ # First check if it's possible to use the assembler variant for the feature.
+ string(TOUPPER "have_asm_${feature}" have_feature)
+ if(NOT DEFINED "${have_feature}" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+ if(MSVC)
+ set(suffix "_x86-64_windows_msvc.asm")
+ elseif(WIN32)
+ set(suffix "_x86-64_windows_gnu.S")
+ else()
+ set(suffix "_x86-64_unix.S")
+ endif()
+
+ if(NOT CMAKE_REQUIRED_QUIET)
+ message(STATUS "Performing Test ${have_feature}")
+ endif()
+
+ set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+ # Must set CMAKE_ASM_MASM_CREATE_STATIC_LIBRARY explicitly otherwise try_compile
+ # fails, see https://discourse.cmake.org/t/building-lib-file-from-asm-cmake-bug/1959
+ try_compile(
+ ${have_feature}
+ ${CMAKE_CURRENT_BINARY_DIR}
+ "${CMAKE_CURRENT_SOURCE_DIR}/blake3_${feature}${suffix}"
+ CMAKE_FLAGS -DCMAKE_ASM_MASM_CREATE_STATIC_LIBRARY=${CMAKE_C_CREATE_STATIC_LIBRARY}
+ COMPILE_DEFINITIONS ${compile_flags})
+
+ unset(CMAKE_TRY_COMPILE_TARGET_TYPE)
+
+ if(NOT CMAKE_REQUIRED_QUIET)
+ if (${${have_feature}})
+ message(STATUS "Performing Test ${have_feature} - Success")
+ else()
+ message(STATUS "Performing Test ${have_feature} - Failed")
+ endif()
+ endif()
+ endif()
+
+ # If the assembler variant didn't work, try the c variant.
+ if(NOT ${have_feature})
+ string(TOUPPER "have_c_${feature}" have_feature)
+ set(suffix ".c")
+
+ set(CMAKE_REQUIRED_FLAGS ${compile_flags})
+ check_c_source_compiles(
+ [=[
+ #include <immintrin.h>
+ int main() { ${intrinsic}; return 0; }
+ ]=]
+ ${have_feature})
+ unset(CMAKE_REQUIRED_FLAGS)
endif()
if(${have_feature})
- target_sources(blake3 PRIVATE blake3_${feature}${blake_suffix})
- set_property(
- SOURCE blake3_${feature}${blake_suffix}
- APPEND PROPERTY COMPILE_FLAGS ${compile_flags})
+ target_sources(blake3 PRIVATE blake3_${feature}${suffix})
+ if(suffix STREQUAL ".c")
+ if(MINGW AND feature STREQUAL "avx512")
+ # Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782.
+ # Taken from blake3's build.rs.
+ set(compile_flags "${compile_flags} -fno-asynchronous-unwind-tables")
+ endif()
+ set_property(
+ SOURCE blake3_${feature}${suffix}
+ APPEND PROPERTY COMPILE_FLAGS ${compile_flags})
+ elseif(NOT MSVC)
+ set_property(
+ SOURCE blake3_${feature}${suffix}
+ PROPERTY COMPILE_FLAGS ${compile_flags})
+ endif()
else()
string(TOUPPER "blake3_no_${feature}" no_feature)
target_compile_definitions(blake3 PRIVATE ${no_feature})
endif()
endfunction()
-add_source_if_enabled(sse2 "-msse2")
-add_source_if_enabled(sse41 "-msse4.1")
-add_source_if_enabled(avx2 "-mavx2")
-add_source_if_enabled(avx512 "-mavx512f -mavx512vl")
+# https://software.intel.com/sites/landingpage/IntrinsicsGuide/
+add_source_if_enabled(sse2 "" "-msse2"
+ "_mm_set1_epi32(42)")
+add_source_if_enabled(sse41 "" "-msse4.1"
+ "_mm_test_all_ones(_mm_set1_epi32(42))")
+add_source_if_enabled(avx2 "/arch:AVX2" "-mavx2"
+ "_mm256_abs_epi8(_mm256_set1_epi32(42))")
+add_source_if_enabled(avx512 "/arch:AVX512" "-mavx512f -mavx512vl"
+ "_mm256_abs_epi64(_mm256_set1_epi32(42))")
-# TODO: how to detect ARM NEON support?
-# If NEON, define BLAKE3_USE_NEON and build blake3_neon.c
+# Neon is always available on AArch64
+if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ # https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics
+ check_c_source_compiles(
+ [=[
+ #include <arm_neon.h>
+ int main() { vdupq_n_s32(42); return 0; }
+ ]=]
+ HAVE_NEON)
+ if(HAVE_NEON)
+ target_sources(blake3 PRIVATE blake3_neon.c)
+ target_compile_definitions(blake3 PRIVATE BLAKE3_USE_NEON)
+ endif()
+endif()
diff --git a/src/third_party/blake3/blake3.c b/src/third_party/blake3/blake3.c
index 741a76d..7abf532 100644
--- a/src/third_party/blake3/blake3.c
+++ b/src/third_party/blake3/blake3.c
@@ -5,6 +5,10 @@
#include "blake3.h"
#include "blake3_impl.h"
+const char * blake3_version(void) {
+ return BLAKE3_VERSION_STRING;
+}
+
INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
uint8_t flags) {
memcpy(self->cv, key, BLAKE3_KEY_LEN);
diff --git a/src/third_party/blake3/blake3.h b/src/third_party/blake3/blake3.h
index 51f1d2a..57ebd5a 100644
--- a/src/third_party/blake3/blake3.h
+++ b/src/third_party/blake3/blake3.h
@@ -8,6 +8,7 @@
extern "C" {
#endif
+#define BLAKE3_VERSION_STRING "0.3.7"
#define BLAKE3_KEY_LEN 32
#define BLAKE3_OUT_LEN 32
#define BLAKE3_BLOCK_LEN 64
@@ -38,6 +39,7 @@ typedef struct {
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
} blake3_hasher;
+const char * blake3_version(void);
void blake3_hasher_init(blake3_hasher *self);
void blake3_hasher_init_keyed(blake3_hasher *self,
const uint8_t key[BLAKE3_KEY_LEN]);
diff --git a/src/third_party/blake3/blake3_avx2_x86-64_windows_msvc.asm b/src/third_party/blake3/blake3_avx2_x86-64_windows_msvc.asm
new file mode 100644
index 0000000..352298e
--- /dev/null
+++ b/src/third_party/blake3/blake3_avx2_x86-64_windows_msvc.asm
@@ -0,0 +1,1828 @@
+public _blake3_hash_many_avx2
+public blake3_hash_many_avx2
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_avx2 PROC
+_blake3_hash_many_avx2 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 880
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ vmovdqa xmmword ptr [rsp+2D0H], xmm6
+ vmovdqa xmmword ptr [rsp+2E0H], xmm7
+ vmovdqa xmmword ptr [rsp+2F0H], xmm8
+ vmovdqa xmmword ptr [rsp+300H], xmm9
+ vmovdqa xmmword ptr [rsp+310H], xmm10
+ vmovdqa xmmword ptr [rsp+320H], xmm11
+ vmovdqa xmmword ptr [rsp+330H], xmm12
+ vmovdqa xmmword ptr [rsp+340H], xmm13
+ vmovdqa xmmword ptr [rsp+350H], xmm14
+ vmovdqa xmmword ptr [rsp+360H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9d
+ vmovd xmm0, r9d
+ vpbroadcastd ymm0, xmm0
+ vmovdqa ymmword ptr [rsp+260H], ymm0
+ vpand ymm1, ymm0, ymmword ptr [ADD0]
+ vpand ymm2, ymm0, ymmword ptr [ADD1]
+ vmovdqa ymmword ptr [rsp+2A0H], ymm2
+ vmovd xmm2, r8d
+ vpbroadcastd ymm2, xmm2
+ vpaddd ymm2, ymm2, ymm1
+ vmovdqa ymmword ptr [rsp+220H], ymm2
+ vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK]
+ vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK]
+ vpcmpgtd ymm2, ymm1, ymm2
+ shr r8, 32
+ vmovd xmm3, r8d
+ vpbroadcastd ymm3, xmm3
+ vpsubd ymm3, ymm3, ymm2
+ vmovdqa ymmword ptr [rsp+240H], ymm3
+ shl rdx, 6
+ mov qword ptr [rsp+2C0H], rdx
+ cmp rsi, 8
+ jc final7blocks
+outerloop8:
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+4H]
+ vpbroadcastd ymm2, dword ptr [rcx+8H]
+ vpbroadcastd ymm3, dword ptr [rcx+0CH]
+ vpbroadcastd ymm4, dword ptr [rcx+10H]
+ vpbroadcastd ymm5, dword ptr [rcx+14H]
+ vpbroadcastd ymm6, dword ptr [rcx+18H]
+ vpbroadcastd ymm7, dword ptr [rcx+1CH]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+20H]
+ mov r13, qword ptr [rdi+28H]
+ mov r14, qword ptr [rdi+30H]
+ mov r15, qword ptr [rdi+38H]
+ movzx eax, byte ptr [rbp+78H]
+ movzx ebx, byte ptr [rbp+80H]
+ or eax, ebx
+ xor edx, edx
+ALIGN 16
+innerloop8:
+ movzx ebx, byte ptr [rbp+88H]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+2C0H]
+ cmove eax, ebx
+ mov dword ptr [rsp+200H], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-40H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-40H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-40H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+20H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+40H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+60H], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-30H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-30H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-30H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+80H], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0A0H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0C0H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0E0H], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-20H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-20H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-20H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+100H], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+120H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+140H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+160H], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-10H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-10H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-10H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+180H], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+1A0H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+1C0H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+1E0H], ymm11
+ vpbroadcastd ymm15, dword ptr [rsp+200H]
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r12+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r13+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r14+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ prefetcht0 byte ptr [r15+rdx+80H]
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+40H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+80H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm0, ymmword ptr [rsp+220H]
+ vpxor ymm13, ymm1, ymmword ptr [rsp+240H]
+ vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN]
+ vpxor ymm15, ymm3, ymm15
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0]
+ vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1]
+ vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2]
+ vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3]
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+20H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+60H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+100H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+180H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+120H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+40H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+60H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0E0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+80H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+20H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+120H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+160H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+60H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+80H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+40H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+120H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+160H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0A0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+20H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+140H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+120H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+60H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+80H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+20H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+40H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+100H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+180H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+120H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+140H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+40H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+60H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+20H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+80H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+120H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+100H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+180H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+20H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+40H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+80H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+60H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0C0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+160H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+20H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1E0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+120H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+40H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+60H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+140H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+80H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+78H]
+ jne innerloop8
+ mov rbx, qword ptr [rbp+90H]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0CCH
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0CCH
+ vblendps ymm3, ymm12, ymm9, 0CCH
+ vperm2f128 ymm12, ymm1, ymm2, 20H
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0CCH
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 20H
+ vmovups ymmword ptr [rbx+20H], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0CCH
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0CCH
+ vblendps ymm14, ymm14, ymm13, 0CCH
+ vperm2f128 ymm8, ymm10, ymm14, 20H
+ vmovups ymmword ptr [rbx+40H], ymm8
+ vblendps ymm15, ymm13, ymm15, 0CCH
+ vperm2f128 ymm13, ymm6, ymm15, 20H
+ vmovups ymmword ptr [rbx+60H], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 31H
+ vperm2f128 ymm11, ymm3, ymm4, 31H
+ vmovups ymmword ptr [rbx+80H], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 31H
+ vperm2f128 ymm15, ymm6, ymm15, 31H
+ vmovups ymmword ptr [rbx+0A0H], ymm11
+ vmovups ymmword ptr [rbx+0C0H], ymm14
+ vmovups ymmword ptr [rbx+0E0H], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp+2A0H]
+ vpaddd ymm1, ymm0, ymmword ptr [rsp+220H]
+ vmovdqa ymmword ptr [rsp+220H], ymm1
+ vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK]
+ vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK]
+ vpcmpgtd ymm2, ymm0, ymm2
+ vmovdqa ymm0, ymmword ptr [rsp+240H]
+ vpsubd ymm2, ymm0, ymm2
+ vmovdqa ymmword ptr [rsp+240H], ymm2
+ add rdi, 64
+ add rbx, 256
+ mov qword ptr [rbp+90H], rbx
+ sub rsi, 8
+ cmp rsi, 8
+ jnc outerloop8
+ test rsi, rsi
+ jnz final7blocks
+unwind:
+ vzeroupper
+ vmovdqa xmm6, xmmword ptr [rsp+2D0H]
+ vmovdqa xmm7, xmmword ptr [rsp+2E0H]
+ vmovdqa xmm8, xmmword ptr [rsp+2F0H]
+ vmovdqa xmm9, xmmword ptr [rsp+300H]
+ vmovdqa xmm10, xmmword ptr [rsp+310H]
+ vmovdqa xmm11, xmmword ptr [rsp+320H]
+ vmovdqa xmm12, xmmword ptr [rsp+330H]
+ vmovdqa xmm13, xmmword ptr [rsp+340H]
+ vmovdqa xmm14, xmmword ptr [rsp+350H]
+ vmovdqa xmm15, xmmword ptr [rsp+360H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final7blocks:
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, qword ptr [rsp+2C0H]
+ movzx r13d, byte ptr [rbp+78H]
+ movzx r12d, byte ptr [rbp+88H]
+ test rsi, 4H
+ je final3blocks
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+10H]
+ vmovdqa ymm8, ymm0
+ vmovdqa ymm9, ymm1
+ vbroadcasti128 ymm12, xmmword ptr [rsp+220H]
+ vbroadcasti128 ymm13, xmmword ptr [rsp+240H]
+ vpunpckldq ymm14, ymm12, ymm13
+ vpunpckhdq ymm15, ymm12, ymm13
+ vpermq ymm14, ymm14, 50H
+ vpermq ymm15, ymm15, 50H
+ vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN]
+ vpblendd ymm14, ymm14, ymm12, 44H
+ vpblendd ymm15, ymm15, ymm12, 44H
+ vmovdqa ymmword ptr [rsp], ymm14
+ vmovdqa ymmword ptr [rsp+20H], ymm15
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+200H], eax
+ vmovups ymm2, ymmword ptr [r8+rdx-40H]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-40H], 01H
+ vmovups ymm3, ymmword ptr [r8+rdx-30H]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-30H], 01H
+ vshufps ymm4, ymm2, ymm3, 136
+ vshufps ymm5, ymm2, ymm3, 221
+ vmovups ymm2, ymmword ptr [r8+rdx-20H]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-20H], 01H
+ vmovups ymm3, ymmword ptr [r8+rdx-10H]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-10H], 01H
+ vshufps ymm6, ymm2, ymm3, 136
+ vshufps ymm7, ymm2, ymm3, 221
+ vpshufd ymm6, ymm6, 93H
+ vpshufd ymm7, ymm7, 93H
+ vmovups ymm10, ymmword ptr [r10+rdx-40H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-40H], 01H
+ vmovups ymm11, ymmword ptr [r10+rdx-30H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-30H], 01H
+ vshufps ymm12, ymm10, ymm11, 136
+ vshufps ymm13, ymm10, ymm11, 221
+ vmovups ymm10, ymmword ptr [r10+rdx-20H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-20H], 01H
+ vmovups ymm11, ymmword ptr [r10+rdx-10H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-10H], 01H
+ vshufps ymm14, ymm10, ymm11, 136
+ vshufps ymm15, ymm10, ymm11, 221
+ vpshufd ymm14, ymm14, 93H
+ vpshufd ymm15, ymm15, 93H
+ vpbroadcastd ymm2, dword ptr [rsp+200H]
+ vmovdqa ymm3, ymmword ptr [rsp]
+ vmovdqa ymm11, ymmword ptr [rsp+20H]
+ vpblendd ymm3, ymm3, ymm2, 88H
+ vpblendd ymm11, ymm11, ymm2, 88H
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV]
+ vmovdqa ymm10, ymm2
+ mov al, 7
+roundloop4:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm8, ymm8, ymm12
+ vmovdqa ymmword ptr [rsp+40H], ymm4
+ nop
+ vmovdqa ymmword ptr [rsp+60H], ymm12
+ nop
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vmovdqa ymmword ptr [rsp+80H], ymm5
+ vmovdqa ymmword ptr [rsp+0A0H], ymm13
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 93H
+ vpshufd ymm8, ymm8, 93H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm11, ymm11, 4EH
+ vpshufd ymm2, ymm2, 39H
+ vpshufd ymm10, ymm10, 39H
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm8, ymm8, ymm14
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm8, ymm8, ymm15
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 39H
+ vpshufd ymm8, ymm8, 39H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm11, ymm11, 4EH
+ vpshufd ymm2, ymm2, 93H
+ vpshufd ymm10, ymm10, 93H
+ dec al
+ je endroundloop4
+ vmovdqa ymm4, ymmword ptr [rsp+40H]
+ vmovdqa ymm5, ymmword ptr [rsp+80H]
+ vshufps ymm12, ymm4, ymm5, 214
+ vpshufd ymm13, ymm4, 0FH
+ vpshufd ymm4, ymm12, 39H
+ vshufps ymm12, ymm6, ymm7, 250
+ vpblendd ymm13, ymm13, ymm12, 0AAH
+ vpunpcklqdq ymm12, ymm7, ymm5
+ vpblendd ymm12, ymm12, ymm6, 88H
+ vpshufd ymm12, ymm12, 78H
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 1EH
+ vmovdqa ymmword ptr [rsp+40H], ymm13
+ vmovdqa ymmword ptr [rsp+80H], ymm12
+ vmovdqa ymm12, ymmword ptr [rsp+60H]
+ vmovdqa ymm13, ymmword ptr [rsp+0A0H]
+ vshufps ymm5, ymm12, ymm13, 214
+ vpshufd ymm6, ymm12, 0FH
+ vpshufd ymm12, ymm5, 39H
+ vshufps ymm5, ymm14, ymm15, 250
+ vpblendd ymm6, ymm6, ymm5, 0AAH
+ vpunpcklqdq ymm5, ymm15, ymm13
+ vpblendd ymm5, ymm5, ymm14, 88H
+ vpshufd ymm5, ymm5, 78H
+ vpunpckhdq ymm13, ymm13, ymm15
+ vpunpckldq ymm14, ymm14, ymm13
+ vpshufd ymm15, ymm14, 1EH
+ vmovdqa ymm13, ymm6
+ vmovdqa ymm14, ymm5
+ vmovdqa ymm5, ymmword ptr [rsp+40H]
+ vmovdqa ymm6, ymmword ptr [rsp+80H]
+ jmp roundloop4
+endroundloop4:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ vpxor ymm8, ymm8, ymm10
+ vpxor ymm9, ymm9, ymm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop4
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vmovdqu xmmword ptr [rbx+40H], xmm8
+ vmovdqu xmmword ptr [rbx+50H], xmm9
+ vextracti128 xmmword ptr [rbx+60H], ymm8, 01H
+ vextracti128 xmmword ptr [rbx+70H], ymm9, 01H
+ vmovaps xmm8, xmmword ptr [rsp+260H]
+ vmovaps xmm0, xmmword ptr [rsp+220H]
+ vmovaps xmm1, xmmword ptr [rsp+230H]
+ vmovaps xmm2, xmmword ptr [rsp+240H]
+ vmovaps xmm3, xmmword ptr [rsp+250H]
+ vblendvps xmm0, xmm0, xmm1, xmm8
+ vblendvps xmm2, xmm2, xmm3, xmm8
+ vmovaps xmmword ptr [rsp+220H], xmm0
+ vmovaps xmmword ptr [rsp+240H], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+final3blocks:
+ test rsi, 2H
+ je final1blocks
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+10H]
+ vmovd xmm13, dword ptr [rsp+220H]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+240H], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovd xmm14, dword ptr [rsp+224H]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+244H], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vinserti128 ymm13, ymm13, xmm14, 01H
+ vbroadcasti128 ymm14, xmmword ptr [ROT16]
+ vbroadcasti128 ymm15, xmmword ptr [ROT8]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+200H], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV]
+ vpbroadcastd ymm8, dword ptr [rsp+200H]
+ vpblendd ymm3, ymm13, ymm8, 88H
+ vmovups ymm8, ymmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 93H
+ vpshufd ymm7, ymm7, 93H
+ mov al, 7
+roundloop2:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 93H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 39H
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 39H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 93H
+ dec al
+ jz endroundloop2
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0FH
+ vpshufd ymm4, ymm8, 39H
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0AAH
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 88H
+ vpshufd ymm8, ymm8, 78H
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 1EH
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp roundloop2
+endroundloop2:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vmovaps ymm8, ymmword ptr [rsp+260H]
+ vmovaps ymm0, ymmword ptr [rsp+220H]
+ vmovups ymm1, ymmword ptr [rsp+228H]
+ vmovaps ymm2, ymmword ptr [rsp+240H]
+ vmovups ymm3, ymmword ptr [rsp+248H]
+ vblendvps ymm0, ymm0, ymm1, ymm8
+ vblendvps ymm2, ymm2, ymm3, ymm8
+ vmovaps ymmword ptr [rsp+220H], ymm0
+ vmovaps ymmword ptr [rsp+240H], ymm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+final1blocks:
+ test rsi, 1H
+ je unwind
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ vmovd xmm3, dword ptr [rsp+220H]
+ vpinsrd xmm3, xmm3, dword ptr [rsp+240H], 1
+ vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovdqa xmm14, xmmword ptr [ROT16]
+ vmovdqa xmm15, xmmword ptr [ROT8]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vmovdqa xmm2, xmmword ptr [BLAKE3_IV]
+ vmovdqa xmm3, xmm13
+ vpinsrd xmm3, xmm3, eax, 3
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vmovups xmm9, xmmword ptr [r8+rdx-30H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vmovups xmm9, xmmword ptr [r8+rdx-10H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+roundloop1:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+
+_blake3_hash_many_avx2 ENDP
+blake3_hash_many_avx2 ENDP
+_TEXT ENDS
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+ADD0:
+ dd 0, 1, 2, 3, 4, 5, 6, 7
+
+ADD1:
+ dd 8 dup (8)
+
+BLAKE3_IV_0:
+ dd 8 dup (6A09E667H)
+
+BLAKE3_IV_1:
+ dd 8 dup (0BB67AE85H)
+
+BLAKE3_IV_2:
+ dd 8 dup (3C6EF372H)
+
+BLAKE3_IV_3:
+ dd 8 dup (0A54FF53AH)
+
+BLAKE3_BLOCK_LEN:
+ dd 8 dup (64)
+
+ROT16:
+ db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+
+ROT8:
+ db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+
+CMP_MSB_MASK:
+ dd 8 dup(80000000H)
+
+BLAKE3_IV:
+ dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH
+
+_RDATA ENDS
+END
diff --git a/src/third_party/blake3/blake3_avx512_x86-64_windows_msvc.asm b/src/third_party/blake3/blake3_avx512_x86-64_windows_msvc.asm
new file mode 100644
index 0000000..97a7268
--- /dev/null
+++ b/src/third_party/blake3/blake3_avx512_x86-64_windows_msvc.asm
@@ -0,0 +1,2634 @@
+public _blake3_hash_many_avx512
+public blake3_hash_many_avx512
+public blake3_compress_in_place_avx512
+public _blake3_compress_in_place_avx512
+public blake3_compress_xof_avx512
+public _blake3_compress_xof_avx512
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_avx512 PROC
+_blake3_hash_many_avx512 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rdi
+ push rsi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 304
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ vmovdqa xmmword ptr [rsp+90H], xmm6
+ vmovdqa xmmword ptr [rsp+0A0H], xmm7
+ vmovdqa xmmword ptr [rsp+0B0H], xmm8
+ vmovdqa xmmword ptr [rsp+0C0H], xmm9
+ vmovdqa xmmword ptr [rsp+0D0H], xmm10
+ vmovdqa xmmword ptr [rsp+0E0H], xmm11
+ vmovdqa xmmword ptr [rsp+0F0H], xmm12
+ vmovdqa xmmword ptr [rsp+100H], xmm13
+ vmovdqa xmmword ptr [rsp+110H], xmm14
+ vmovdqa xmmword ptr [rsp+120H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9
+ kmovw k1, r9d
+ vmovd xmm0, r8d
+ vpbroadcastd ymm0, xmm0
+ shr r8, 32
+ vmovd xmm1, r8d
+ vpbroadcastd ymm1, xmm1
+ vmovdqa ymm4, ymm1
+ vmovdqa ymm5, ymm1
+ vpaddd ymm2, ymm0, ymmword ptr [ADD0]
+ vpaddd ymm3, ymm0, ymmword ptr [ADD0+32]
+ vpcmpud k2, ymm2, ymm0, 1
+ vpcmpud k3, ymm3, ymm0, 1
+ ; XXX: ml64.exe does not currently understand the syntax. We use a workaround.
+ vpbroadcastd ymm6, dword ptr [ADD1]
+ vpaddd ymm4 {k2}, ymm4, ymm6
+ vpaddd ymm5 {k3}, ymm5, ymm6
+ ; vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1] {1to8}
+ ; vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1] {1to8}
+ knotw k2, k1
+ vmovdqa32 ymm2 {k2}, ymm0
+ vmovdqa32 ymm3 {k2}, ymm0
+ vmovdqa32 ymm4 {k2}, ymm1
+ vmovdqa32 ymm5 {k2}, ymm1
+ vmovdqa ymmword ptr [rsp], ymm2
+ vmovdqa ymmword ptr [rsp+20H], ymm3
+ vmovdqa ymmword ptr [rsp+40H], ymm4
+ vmovdqa ymmword ptr [rsp+60H], ymm5
+ shl rdx, 6
+ mov qword ptr [rsp+80H], rdx
+ cmp rsi, 16
+ jc final15blocks
+outerloop16:
+ vpbroadcastd zmm0, dword ptr [rcx]
+ vpbroadcastd zmm1, dword ptr [rcx+1H*4H]
+ vpbroadcastd zmm2, dword ptr [rcx+2H*4H]
+ vpbroadcastd zmm3, dword ptr [rcx+3H*4H]
+ vpbroadcastd zmm4, dword ptr [rcx+4H*4H]
+ vpbroadcastd zmm5, dword ptr [rcx+5H*4H]
+ vpbroadcastd zmm6, dword ptr [rcx+6H*4H]
+ vpbroadcastd zmm7, dword ptr [rcx+7H*4H]
+ movzx eax, byte ptr [rbp+78H]
+ movzx ebx, byte ptr [rbp+80H]
+ or eax, ebx
+ xor edx, edx
+ALIGN 16
+innerloop16:
+ movzx ebx, byte ptr [rbp+88H]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+80H]
+ cmove eax, ebx
+ mov dword ptr [rsp+88H], eax
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+40H]
+ mov r13, qword ptr [rdi+48H]
+ mov r14, qword ptr [rdi+50H]
+ mov r15, qword ptr [rdi+58H]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
+ vpunpcklqdq zmm8, zmm16, zmm17
+ vpunpckhqdq zmm9, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
+ vpunpcklqdq zmm10, zmm18, zmm19
+ vpunpckhqdq zmm11, zmm18, zmm19
+ mov r8, qword ptr [rdi+20H]
+ mov r9, qword ptr [rdi+28H]
+ mov r10, qword ptr [rdi+30H]
+ mov r11, qword ptr [rdi+38H]
+ mov r12, qword ptr [rdi+60H]
+ mov r13, qword ptr [rdi+68H]
+ mov r14, qword ptr [rdi+70H]
+ mov r15, qword ptr [rdi+78H]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
+ vpunpcklqdq zmm12, zmm16, zmm17
+ vpunpckhqdq zmm13, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
+ vpunpcklqdq zmm14, zmm18, zmm19
+ vpunpckhqdq zmm15, zmm18, zmm19
+ vmovdqa32 zmm27, zmmword ptr [INDEX0]
+ vmovdqa32 zmm31, zmmword ptr [INDEX1]
+ vshufps zmm16, zmm8, zmm10, 136
+ vshufps zmm17, zmm12, zmm14, 136
+ vmovdqa32 zmm20, zmm16
+ vpermt2d zmm16, zmm27, zmm17
+ vpermt2d zmm20, zmm31, zmm17
+ vshufps zmm17, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm21, zmm17
+ vpermt2d zmm17, zmm27, zmm30
+ vpermt2d zmm21, zmm31, zmm30
+ vshufps zmm18, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm22, zmm18
+ vpermt2d zmm18, zmm27, zmm8
+ vpermt2d zmm22, zmm31, zmm8
+ vshufps zmm19, zmm9, zmm11, 221
+ vshufps zmm8, zmm13, zmm15, 221
+ vmovdqa32 zmm23, zmm19
+ vpermt2d zmm19, zmm27, zmm8
+ vpermt2d zmm23, zmm31, zmm8
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+40H]
+ mov r13, qword ptr [rdi+48H]
+ mov r14, qword ptr [rdi+50H]
+ mov r15, qword ptr [rdi+58H]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
+ vpunpcklqdq zmm8, zmm24, zmm25
+ vpunpckhqdq zmm9, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
+ vpunpcklqdq zmm10, zmm24, zmm25
+ vpunpckhqdq zmm11, zmm24, zmm25
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r12+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r13+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r14+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ prefetcht0 byte ptr [r15+rdx+80H]
+ mov r8, qword ptr [rdi+20H]
+ mov r9, qword ptr [rdi+28H]
+ mov r10, qword ptr [rdi+30H]
+ mov r11, qword ptr [rdi+38H]
+ mov r12, qword ptr [rdi+60H]
+ mov r13, qword ptr [rdi+68H]
+ mov r14, qword ptr [rdi+70H]
+ mov r15, qword ptr [rdi+78H]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
+ vpunpcklqdq zmm12, zmm24, zmm25
+ vpunpckhqdq zmm13, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
+ vpunpcklqdq zmm14, zmm24, zmm25
+ vpunpckhqdq zmm15, zmm24, zmm25
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r12+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r13+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r14+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ prefetcht0 byte ptr [r15+rdx+80H]
+ vshufps zmm24, zmm8, zmm10, 136
+ vshufps zmm30, zmm12, zmm14, 136
+ vmovdqa32 zmm28, zmm24
+ vpermt2d zmm24, zmm27, zmm30
+ vpermt2d zmm28, zmm31, zmm30
+ vshufps zmm25, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm29, zmm25
+ vpermt2d zmm25, zmm27, zmm30
+ vpermt2d zmm29, zmm31, zmm30
+ vshufps zmm26, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm30, zmm26
+ vpermt2d zmm26, zmm27, zmm8
+ vpermt2d zmm30, zmm31, zmm8
+ vshufps zmm8, zmm9, zmm11, 221
+ vshufps zmm10, zmm13, zmm15, 221
+ vpermi2d zmm27, zmm8, zmm10
+ vpermi2d zmm31, zmm8, zmm10
+ vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0]
+ vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1]
+ vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2]
+ vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3]
+ vmovdqa32 zmm12, zmmword ptr [rsp]
+ vmovdqa32 zmm13, zmmword ptr [rsp+1H*40H]
+ vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN]
+ vpbroadcastd zmm15, dword ptr [rsp+22H*4H]
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm24
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm23
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm27
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm21
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm28
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm26
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm22
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm31
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpxord zmm0, zmm0, zmm8
+ vpxord zmm1, zmm1, zmm9
+ vpxord zmm2, zmm2, zmm10
+ vpxord zmm3, zmm3, zmm11
+ vpxord zmm4, zmm4, zmm12
+ vpxord zmm5, zmm5, zmm13
+ vpxord zmm6, zmm6, zmm14
+ vpxord zmm7, zmm7, zmm15
+ movzx eax, byte ptr [rbp+78H]
+ jne innerloop16
+ mov rbx, qword ptr [rbp+90H]
+ vpunpckldq zmm16, zmm0, zmm1
+ vpunpckhdq zmm17, zmm0, zmm1
+ vpunpckldq zmm18, zmm2, zmm3
+ vpunpckhdq zmm19, zmm2, zmm3
+ vpunpckldq zmm20, zmm4, zmm5
+ vpunpckhdq zmm21, zmm4, zmm5
+ vpunpckldq zmm22, zmm6, zmm7
+ vpunpckhdq zmm23, zmm6, zmm7
+ vpunpcklqdq zmm0, zmm16, zmm18
+ vpunpckhqdq zmm1, zmm16, zmm18
+ vpunpcklqdq zmm2, zmm17, zmm19
+ vpunpckhqdq zmm3, zmm17, zmm19
+ vpunpcklqdq zmm4, zmm20, zmm22
+ vpunpckhqdq zmm5, zmm20, zmm22
+ vpunpcklqdq zmm6, zmm21, zmm23
+ vpunpckhqdq zmm7, zmm21, zmm23
+ vshufi32x4 zmm16, zmm0, zmm4, 88H
+ vshufi32x4 zmm17, zmm1, zmm5, 88H
+ vshufi32x4 zmm18, zmm2, zmm6, 88H
+ vshufi32x4 zmm19, zmm3, zmm7, 88H
+ vshufi32x4 zmm20, zmm0, zmm4, 0DDH
+ vshufi32x4 zmm21, zmm1, zmm5, 0DDH
+ vshufi32x4 zmm22, zmm2, zmm6, 0DDH
+ vshufi32x4 zmm23, zmm3, zmm7, 0DDH
+ vshufi32x4 zmm0, zmm16, zmm17, 88H
+ vshufi32x4 zmm1, zmm18, zmm19, 88H
+ vshufi32x4 zmm2, zmm20, zmm21, 88H
+ vshufi32x4 zmm3, zmm22, zmm23, 88H
+ vshufi32x4 zmm4, zmm16, zmm17, 0DDH
+ vshufi32x4 zmm5, zmm18, zmm19, 0DDH
+ vshufi32x4 zmm6, zmm20, zmm21, 0DDH
+ vshufi32x4 zmm7, zmm22, zmm23, 0DDH
+ vmovdqu32 zmmword ptr [rbx], zmm0
+ vmovdqu32 zmmword ptr [rbx+1H*40H], zmm1
+ vmovdqu32 zmmword ptr [rbx+2H*40H], zmm2
+ vmovdqu32 zmmword ptr [rbx+3H*40H], zmm3
+ vmovdqu32 zmmword ptr [rbx+4H*40H], zmm4
+ vmovdqu32 zmmword ptr [rbx+5H*40H], zmm5
+ vmovdqu32 zmmword ptr [rbx+6H*40H], zmm6
+ vmovdqu32 zmmword ptr [rbx+7H*40H], zmm7
+ vmovdqa32 zmm0, zmmword ptr [rsp]
+ vmovdqa32 zmm1, zmmword ptr [rsp+1H*40H]
+ vmovdqa32 zmm2, zmm0
+ ; XXX: ml64.exe does not currently understand the syntax. We use a workaround.
+ vpbroadcastd zmm4, dword ptr [ADD16]
+ vpbroadcastd zmm5, dword ptr [ADD1]
+ vpaddd zmm2{k1}, zmm0, zmm4
+ ; vpaddd zmm2{k1}, zmm0, dword ptr [ADD16] ; {1to16}
+ vpcmpud k2, zmm2, zmm0, 1
+ vpaddd zmm1 {k2}, zmm1, zmm5
+ ; vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1] ; {1to16}
+ vmovdqa32 zmmword ptr [rsp], zmm2
+ vmovdqa32 zmmword ptr [rsp+1H*40H], zmm1
+ add rdi, 128
+ add rbx, 512
+ mov qword ptr [rbp+90H], rbx
+ sub rsi, 16
+ cmp rsi, 16
+ jnc outerloop16
+ test rsi, rsi
+ jne final15blocks
+unwind:
+ vzeroupper
+ vmovdqa xmm6, xmmword ptr [rsp+90H]
+ vmovdqa xmm7, xmmword ptr [rsp+0A0H]
+ vmovdqa xmm8, xmmword ptr [rsp+0B0H]
+ vmovdqa xmm9, xmmword ptr [rsp+0C0H]
+ vmovdqa xmm10, xmmword ptr [rsp+0D0H]
+ vmovdqa xmm11, xmmword ptr [rsp+0E0H]
+ vmovdqa xmm12, xmmword ptr [rsp+0F0H]
+ vmovdqa xmm13, xmmword ptr [rsp+100H]
+ vmovdqa xmm14, xmmword ptr [rsp+110H]
+ vmovdqa xmm15, xmmword ptr [rsp+120H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rsi
+ pop rdi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final15blocks:
+ test esi, 8H
+ je final7blocks
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+4H]
+ vpbroadcastd ymm2, dword ptr [rcx+8H]
+ vpbroadcastd ymm3, dword ptr [rcx+0CH]
+ vpbroadcastd ymm4, dword ptr [rcx+10H]
+ vpbroadcastd ymm5, dword ptr [rcx+14H]
+ vpbroadcastd ymm6, dword ptr [rcx+18H]
+ vpbroadcastd ymm7, dword ptr [rcx+1CH]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+20H]
+ mov r13, qword ptr [rdi+28H]
+ mov r14, qword ptr [rdi+30H]
+ mov r15, qword ptr [rdi+38H]
+ movzx eax, byte ptr [rbp+78H]
+ movzx ebx, byte ptr [rbp+80H]
+ or eax, ebx
+ xor edx, edx
+innerloop8:
+ movzx ebx, byte ptr [rbp+88H]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+80H]
+ cmove eax, ebx
+ mov dword ptr [rsp+88H], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-40H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-40H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-40H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm16, ymm12, ymm14, 136
+ vshufps ymm17, ymm12, ymm14, 221
+ vshufps ymm18, ymm13, ymm15, 136
+ vshufps ymm19, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-30H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-30H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-30H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm20, ymm12, ymm14, 136
+ vshufps ymm21, ymm12, ymm14, 221
+ vshufps ymm22, ymm13, ymm15, 136
+ vshufps ymm23, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-20H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-20H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-20H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm24, ymm12, ymm14, 136
+ vshufps ymm25, ymm12, ymm14, 221
+ vshufps ymm26, ymm13, ymm15, 136
+ vshufps ymm27, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-10H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-10H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-10H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm28, ymm12, ymm14, 136
+ vshufps ymm29, ymm12, ymm14, 221
+ vshufps ymm30, ymm13, ymm15, 136
+ vshufps ymm31, ymm13, ymm15, 221
+ vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0]
+ vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1]
+ vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2]
+ vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3]
+ vmovdqa ymm12, ymmword ptr [rsp]
+ vmovdqa ymm13, ymmword ptr [rsp+40H]
+ vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN]
+ vpbroadcastd ymm15, dword ptr [rsp+88H]
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm24
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm23
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm27
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm21
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm28
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm26
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm22
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm31
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+78H]
+ jne innerloop8
+ mov rbx, qword ptr [rbp+90H]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0CCH
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0CCH
+ vblendps ymm3, ymm12, ymm9, 0CCH
+ vperm2f128 ymm12, ymm1, ymm2, 20H
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0CCH
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 20H
+ vmovups ymmword ptr [rbx+20H], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0CCH
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0CCH
+ vblendps ymm14, ymm14, ymm13, 0CCH
+ vperm2f128 ymm8, ymm10, ymm14, 20H
+ vmovups ymmword ptr [rbx+40H], ymm8
+ vblendps ymm15, ymm13, ymm15, 0CCH
+ vperm2f128 ymm13, ymm6, ymm15, 20H
+ vmovups ymmword ptr [rbx+60H], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 31H
+ vperm2f128 ymm11, ymm3, ymm4, 31H
+ vmovups ymmword ptr [rbx+80H], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 31H
+ vperm2f128 ymm15, ymm6, ymm15, 31H
+ vmovups ymmword ptr [rbx+0A0H], ymm11
+ vmovups ymmword ptr [rbx+0C0H], ymm14
+ vmovups ymmword ptr [rbx+0E0H], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp]
+ vmovdqa ymm2, ymmword ptr [rsp+40H]
+ vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+1H*20H]
+ vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+3H*20H]
+ vmovdqa ymmword ptr [rsp], ymm0
+ vmovdqa ymmword ptr [rsp+40H], ymm2
+ add rbx, 256
+ mov qword ptr [rbp+90H], rbx
+ add rdi, 64
+ sub rsi, 8
+final7blocks:
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, qword ptr [rsp+80H]
+ movzx r13, byte ptr [rbp+78H]
+ movzx r12, byte ptr [rbp+88H]
+ test esi, 4H
+ je final3blocks
+ vbroadcasti32x4 zmm0, xmmword ptr [rcx]
+ vbroadcasti32x4 zmm1, xmmword ptr [rcx+1H*10H]
+ vmovdqa xmm12, xmmword ptr [rsp]
+ vmovdqa xmm13, xmmword ptr [rsp+40H]
+ vpunpckldq xmm14, xmm12, xmm13
+ vpunpckhdq xmm15, xmm12, xmm13
+ vpermq ymm14, ymm14, 0DCH
+ vpermq ymm15, ymm15, 0DCH
+ vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN]
+ vinserti64x4 zmm13, zmm14, ymm15, 01H
+ mov eax, 17476
+ kmovw k2, eax
+ vpblendmd zmm13 {k2}, zmm13, zmm12
+ vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov eax, 43690
+ kmovw k3, eax
+ mov eax, 34952
+ kmovw k4, eax
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+88H], eax
+ vmovdqa32 zmm2, zmm15
+ vpbroadcastd zmm8, dword ptr [rsp+22H*4H]
+ vpblendmd zmm3 {k4}, zmm13, zmm8
+ vmovups zmm8, zmmword ptr [r8+rdx-1H*40H]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-4H*10H], 01H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-4H*10H], 02H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-4H*10H], 03H
+ vmovups zmm9, zmmword ptr [r8+rdx-30H]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-3H*10H], 01H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-3H*10H], 02H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-3H*10H], 03H
+ vshufps zmm4, zmm8, zmm9, 136
+ vshufps zmm5, zmm8, zmm9, 221
+ vmovups zmm8, zmmword ptr [r8+rdx-20H]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-2H*10H], 01H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-2H*10H], 02H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-2H*10H], 03H
+ vmovups zmm9, zmmword ptr [r8+rdx-10H]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-1H*10H], 01H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-1H*10H], 02H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-1H*10H], 03H
+ vshufps zmm6, zmm8, zmm9, 136
+ vshufps zmm7, zmm8, zmm9, 221
+ vpshufd zmm6, zmm6, 93H
+ vpshufd zmm7, zmm7, 93H
+ mov al, 7
+roundloop4:
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 93H
+ vpshufd zmm3, zmm3, 4EH
+ vpshufd zmm2, zmm2, 39H
+ vpaddd zmm0, zmm0, zmm6
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm7
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 39H
+ vpshufd zmm3, zmm3, 4EH
+ vpshufd zmm2, zmm2, 93H
+ dec al
+ jz endroundloop4
+ vshufps zmm8, zmm4, zmm5, 214
+ vpshufd zmm9, zmm4, 0FH
+ vpshufd zmm4, zmm8, 39H
+ vshufps zmm8, zmm6, zmm7, 250
+ vpblendmd zmm9 {k3}, zmm9, zmm8
+ vpunpcklqdq zmm8, zmm7, zmm5
+ vpblendmd zmm8 {k4}, zmm8, zmm6
+ vpshufd zmm8, zmm8, 78H
+ vpunpckhdq zmm5, zmm5, zmm7
+ vpunpckldq zmm6, zmm6, zmm5
+ vpshufd zmm7, zmm6, 1EH
+ vmovdqa32 zmm5, zmm9
+ vmovdqa32 zmm6, zmm8
+ jmp roundloop4
+endroundloop4:
+ vpxord zmm0, zmm0, zmm2
+ vpxord zmm1, zmm1, zmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop4
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vextracti32x4 xmmword ptr [rbx+4H*10H], zmm0, 02H
+ vextracti32x4 xmmword ptr [rbx+5H*10H], zmm1, 02H
+ vextracti32x4 xmmword ptr [rbx+6H*10H], zmm0, 03H
+ vextracti32x4 xmmword ptr [rbx+7H*10H], zmm1, 03H
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+40H]
+ vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+1H*10H]
+ vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+5H*10H]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+40H], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+final3blocks:
+ test esi, 2H
+ je final1block
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+10H]
+ vmovd xmm13, dword ptr [rsp]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+40H], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovd xmm14, dword ptr [rsp+4H]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+44H], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vinserti128 ymm13, ymm13, xmm14, 01H
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+88H], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV]
+ vpbroadcastd ymm8, dword ptr [rsp+88H]
+ vpblendd ymm3, ymm13, ymm8, 88H
+ vmovups ymm8, ymmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 93H
+ vpshufd ymm7, ymm7, 93H
+ mov al, 7
+roundloop2:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 93H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 39H
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 39H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 93H
+ dec al
+ jz endroundloop2
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0FH
+ vpshufd ymm4, ymm8, 39H
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0AAH
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 88H
+ vpshufd ymm8, ymm8, 78H
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 1EH
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp roundloop2
+endroundloop2:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+40H]
+ vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+8H]
+ vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+48H]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+40H], xmm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+final1block:
+ test esi, 1H
+ je unwind
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ vmovd xmm14, dword ptr [rsp]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+40H], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovdqa xmm15, xmmword ptr [BLAKE3_IV]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vpinsrd xmm3, xmm14, eax, 3
+ vmovdqa xmm2, xmm15
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vmovups xmm9, xmmword ptr [r8+rdx-30H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vmovups xmm9, xmmword ptr [r8+rdx-10H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+roundloop1:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+
+_blake3_hash_many_avx512 ENDP
+blake3_hash_many_avx512 ENDP
+
+ALIGN 16
+blake3_compress_in_place_avx512 PROC
+_blake3_compress_in_place_avx512 PROC
+ sub rsp, 72
+ vmovdqa xmmword ptr [rsp], xmm6
+ vmovdqa xmmword ptr [rsp+10H], xmm7
+ vmovdqa xmmword ptr [rsp+20H], xmm8
+ vmovdqa xmmword ptr [rsp+30H], xmm9
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ movzx eax, byte ptr [rsp+70H]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ vmovd xmm3, r9
+ vmovd xmm4, r8
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV]
+ vmovups xmm8, xmmword ptr [rdx]
+ vmovups xmm9, xmmword ptr [rdx+10H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rdx+20H]
+ vmovups xmm9, xmmword ptr [rdx+30H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+@@:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp @B
+@@:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vmovdqu xmmword ptr [rcx], xmm0
+ vmovdqu xmmword ptr [rcx+10H], xmm1
+ vmovdqa xmm6, xmmword ptr [rsp]
+ vmovdqa xmm7, xmmword ptr [rsp+10H]
+ vmovdqa xmm8, xmmword ptr [rsp+20H]
+ vmovdqa xmm9, xmmword ptr [rsp+30H]
+ add rsp, 72
+ ret
+_blake3_compress_in_place_avx512 ENDP
+blake3_compress_in_place_avx512 ENDP
+
+ALIGN 16
+blake3_compress_xof_avx512 PROC
+_blake3_compress_xof_avx512 PROC
+ sub rsp, 72
+ vmovdqa xmmword ptr [rsp], xmm6
+ vmovdqa xmmword ptr [rsp+10H], xmm7
+ vmovdqa xmmword ptr [rsp+20H], xmm8
+ vmovdqa xmmword ptr [rsp+30H], xmm9
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ movzx eax, byte ptr [rsp+70H]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+78H]
+ shl rax, 32
+ add r8, rax
+ vmovd xmm3, r9
+ vmovd xmm4, r8
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV]
+ vmovups xmm8, xmmword ptr [rdx]
+ vmovups xmm9, xmmword ptr [rdx+10H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rdx+20H]
+ vmovups xmm9, xmmword ptr [rdx+30H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+@@:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp @B
+@@:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vpxor xmm2, xmm2, xmmword ptr [rcx]
+ vpxor xmm3, xmm3, xmmword ptr [rcx+10H]
+ vmovdqu xmmword ptr [r10], xmm0
+ vmovdqu xmmword ptr [r10+10H], xmm1
+ vmovdqu xmmword ptr [r10+20H], xmm2
+ vmovdqu xmmword ptr [r10+30H], xmm3
+ vmovdqa xmm6, xmmword ptr [rsp]
+ vmovdqa xmm7, xmmword ptr [rsp+10H]
+ vmovdqa xmm8, xmmword ptr [rsp+20H]
+ vmovdqa xmm9, xmmword ptr [rsp+30H]
+ add rsp, 72
+ ret
+_blake3_compress_xof_avx512 ENDP
+blake3_compress_xof_avx512 ENDP
+
+_TEXT ENDS
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+INDEX0:
+ dd 0, 1, 2, 3, 16, 17, 18, 19
+ dd 8, 9, 10, 11, 24, 25, 26, 27
+INDEX1:
+ dd 4, 5, 6, 7, 20, 21, 22, 23
+ dd 12, 13, 14, 15, 28, 29, 30, 31
+ADD0:
+ dd 0, 1, 2, 3, 4, 5, 6, 7
+ dd 8, 9, 10, 11, 12, 13, 14, 15
+ADD1:
+ dd 1
+ADD16:
+ dd 16
+BLAKE3_BLOCK_LEN:
+ dd 64
+ALIGN 64
+BLAKE3_IV:
+BLAKE3_IV_0:
+ dd 06A09E667H
+BLAKE3_IV_1:
+ dd 0BB67AE85H
+BLAKE3_IV_2:
+ dd 03C6EF372H
+BLAKE3_IV_3:
+ dd 0A54FF53AH
+
+_RDATA ENDS
+END
diff --git a/src/third_party/blake3/blake3_dispatch.c b/src/third_party/blake3/blake3_dispatch.c
index a4c0fa9..6518478 100644
--- a/src/third_party/blake3/blake3_dispatch.c
+++ b/src/third_party/blake3/blake3_dispatch.c
@@ -14,6 +14,8 @@
#endif
#endif
+#define MAYBE_UNUSED(x) (void)((x))
+
#if defined(IS_X86)
static uint64_t xgetbv() {
#if defined(_MSC_VER)
@@ -137,6 +139,7 @@ void blake3_compress_in_place(uint32_t cv[8],
uint8_t flags) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if (features & AVX512VL) {
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
@@ -165,6 +168,7 @@ void blake3_compress_xof(const uint32_t cv[8],
uint8_t out[64]) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if (features & AVX512VL) {
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
@@ -193,6 +197,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
@@ -242,6 +247,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
size_t blake3_simd_degree(void) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
return 16;
diff --git a/src/third_party/blake3/blake3_sse2_x86-64_windows_msvc.asm b/src/third_party/blake3/blake3_sse2_x86-64_windows_msvc.asm
index 72deb7b..ff9bb4d 100644
--- a/src/third_party/blake3/blake3_sse2_x86-64_windows_msvc.asm
+++ b/src/third_party/blake3/blake3_sse2_x86-64_windows_msvc.asm
@@ -2054,8 +2054,8 @@ _blake3_compress_in_place_sse2 PROC
movzx r8d, r8b
shl rax, 32
add r8, rax
- movq xmm3, r9
- movq xmm4, r8
+ movd xmm3, r9
+ movd xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]
@@ -2186,8 +2186,8 @@ _blake3_compress_xof_sse2 PROC
mov r10, qword ptr [rsp+0A8H]
shl rax, 32
add r8, rax
- movq xmm3, r9
- movq xmm4, r8
+ movd xmm3, r9
+ movd xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]
diff --git a/src/third_party/blake3/blake3_sse41_x86-64_windows_msvc.asm b/src/third_party/blake3/blake3_sse41_x86-64_windows_msvc.asm
new file mode 100644
index 0000000..8966c7b
--- /dev/null
+++ b/src/third_party/blake3/blake3_sse41_x86-64_windows_msvc.asm
@@ -0,0 +1,2089 @@
+public _blake3_hash_many_sse41
+public blake3_hash_many_sse41
+public blake3_compress_in_place_sse41
+public _blake3_compress_in_place_sse41
+public blake3_compress_xof_sse41
+public _blake3_compress_xof_sse41
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_sse41 PROC
+_blake3_hash_many_sse41 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 528
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ movdqa xmmword ptr [rsp+170H], xmm6
+ movdqa xmmword ptr [rsp+180H], xmm7
+ movdqa xmmword ptr [rsp+190H], xmm8
+ movdqa xmmword ptr [rsp+1A0H], xmm9
+ movdqa xmmword ptr [rsp+1B0H], xmm10
+ movdqa xmmword ptr [rsp+1C0H], xmm11
+ movdqa xmmword ptr [rsp+1D0H], xmm12
+ movdqa xmmword ptr [rsp+1E0H], xmm13
+ movdqa xmmword ptr [rsp+1F0H], xmm14
+ movdqa xmmword ptr [rsp+200H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 00H
+ movdqa xmmword ptr [rsp+130H], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0]
+ pand xmm0, xmmword ptr [ADD1]
+ movdqa xmmword ptr [rsp+150H], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 00H
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+110H], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 00H
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+120H], xmm2
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+78H]
+ movzx r12d, byte ptr [rbp+88H]
+ cmp rsi, 4
+ jc final3blocks
+outerloop4:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 00H
+ pshufd xmm1, xmm3, 55H
+ pshufd xmm2, xmm3, 0AAH
+ pshufd xmm3, xmm3, 0FFH
+ movdqu xmm7, xmmword ptr [rcx+10H]
+ pshufd xmm4, xmm7, 00H
+ pshufd xmm5, xmm7, 55H
+ pshufd xmm6, xmm7, 0AAH
+ pshufd xmm7, xmm7, 0FFH
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-40H]
+ movdqu xmm9, xmmword ptr [r9+rdx-40H]
+ movdqu xmm10, xmmword ptr [r10+rdx-40H]
+ movdqu xmm11, xmmword ptr [r11+rdx-40H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+10H], xmm9
+ movdqa xmmword ptr [rsp+20H], xmm12
+ movdqa xmmword ptr [rsp+30H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-30H]
+ movdqu xmm9, xmmword ptr [r9+rdx-30H]
+ movdqu xmm10, xmmword ptr [r10+rdx-30H]
+ movdqu xmm11, xmmword ptr [r11+rdx-30H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+40H], xmm8
+ movdqa xmmword ptr [rsp+50H], xmm9
+ movdqa xmmword ptr [rsp+60H], xmm12
+ movdqa xmmword ptr [rsp+70H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-20H]
+ movdqu xmm9, xmmword ptr [r9+rdx-20H]
+ movdqu xmm10, xmmword ptr [r10+rdx-20H]
+ movdqu xmm11, xmmword ptr [r11+rdx-20H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+80H], xmm8
+ movdqa xmmword ptr [rsp+90H], xmm9
+ movdqa xmmword ptr [rsp+0A0H], xmm12
+ movdqa xmmword ptr [rsp+0B0H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-10H]
+ movdqu xmm9, xmmword ptr [r9+rdx-10H]
+ movdqu xmm10, xmmword ptr [r10+rdx-10H]
+ movdqu xmm11, xmmword ptr [r11+rdx-10H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0C0H], xmm8
+ movdqa xmmword ptr [rsp+0D0H], xmm9
+ movdqa xmmword ptr [rsp+0E0H], xmm12
+ movdqa xmmword ptr [rsp+0F0H], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3]
+ movdqa xmm12, xmmword ptr [rsp+110H]
+ movdqa xmm13, xmmword ptr [rsp+120H]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 00H
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+40H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+10H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+50H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+80H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+0C0H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+90H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+0D0H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+20H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+70H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+60H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+10H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+90H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0B0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+0E0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+30H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+0D0H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+40H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+20H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+60H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+0B0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+50H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0F0H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0A0H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+0E0H]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+70H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+30H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+40H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+50H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+80H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0C0H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+0F0H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0D0H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+0A0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+70H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+20H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+10H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+90H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+80H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0E0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+0C0H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0D0H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+20H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+30H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+60H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0B0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+10H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0F0H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+90H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0E0H]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+30H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0A0H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+40H]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne innerloop4
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+20H], xmm1
+ movdqu xmmword ptr [rbx+40H], xmm9
+ movdqu xmmword ptr [rbx+60H], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+10H], xmm4
+ movdqu xmmword ptr [rbx+30H], xmm5
+ movdqu xmmword ptr [rbx+50H], xmm9
+ movdqu xmmword ptr [rbx+70H], xmm7
+ movdqa xmm1, xmmword ptr [rsp+110H]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+150H]
+ movdqa xmmword ptr [rsp+110H], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+120H]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+120H], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc outerloop4
+ test rsi, rsi
+ jne final3blocks
+unwind:
+ movdqa xmm6, xmmword ptr [rsp+170H]
+ movdqa xmm7, xmmword ptr [rsp+180H]
+ movdqa xmm8, xmmword ptr [rsp+190H]
+ movdqa xmm9, xmmword ptr [rsp+1A0H]
+ movdqa xmm10, xmmword ptr [rsp+1B0H]
+ movdqa xmm11, xmmword ptr [rsp+1C0H]
+ movdqa xmm12, xmmword ptr [rsp+1D0H]
+ movdqa xmm13, xmmword ptr [rsp+1E0H]
+ movdqa xmm14, xmmword ptr [rsp+1F0H]
+ movdqa xmm15, xmmword ptr [rsp+200H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final3blocks:
+ test esi, 2H
+ je final1block
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+110H]
+ pinsrd xmm13, dword ptr [rsp+120H], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+114H]
+ pinsrd xmm14, dword ptr [rsp+124H], 1
+ pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ movaps xmmword ptr [rsp+10H], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-40H]
+ movups xmm5, xmmword ptr [r8+rdx-30H]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-20H]
+ movups xmm7, xmmword ptr [r8+rdx-10H]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 93H
+ movups xmm12, xmmword ptr [r9+rdx-40H]
+ movups xmm13, xmmword ptr [r9+rdx-30H]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-20H]
+ movups xmm15, xmmword ptr [r9+rdx-10H]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 93H
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 93H
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+10H]
+ pinsrd xmm3, eax, 3
+ pinsrd xmm11, eax, 3
+ mov al, 7
+roundloop2:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+20H], xmm4
+ movaps xmmword ptr [rsp+30H], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm12, xmmword ptr [ROT16]
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+40H], xmm5
+ movaps xmmword ptr [rsp+50H], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm13, xmmword ptr [ROT8]
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm8, xmm8, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm11, xmm11, 4EH
+ pshufd xmm2, xmm2, 39H
+ pshufd xmm10, xmm10, 39H
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm8, xmm8, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm11, xmm11, 4EH
+ pshufd xmm2, xmm2, 93H
+ pshufd xmm10, xmm10, 93H
+ dec al
+ je endroundloop2
+ movdqa xmm12, xmmword ptr [rsp+20H]
+ movdqa xmm5, xmmword ptr [rsp+40H]
+ pshufd xmm13, xmm12, 0FH
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 39H
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pblendw xmm13, xmm12, 0CCH
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ pblendw xmm12, xmm6, 0C0H
+ pshufd xmm12, xmm12, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmmword ptr [rsp+20H], xmm13
+ movdqa xmmword ptr [rsp+40H], xmm12
+ movdqa xmm5, xmmword ptr [rsp+30H]
+ movdqa xmm13, xmmword ptr [rsp+50H]
+ pshufd xmm6, xmm5, 0FH
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 39H
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pblendw xmm6, xmm5, 0CCH
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ pblendw xmm5, xmm14, 0C0H
+ pshufd xmm5, xmm5, 78H
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 1EH
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+20H]
+ movdqa xmm6, xmmword ptr [rsp+40H]
+ jmp roundloop2
+endroundloop2:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+10H], xmm1
+ movups xmmword ptr [rbx+20H], xmm8
+ movups xmmword ptr [rbx+30H], xmm9
+ movdqa xmm0, xmmword ptr [rsp+130H]
+ movdqa xmm1, xmmword ptr [rsp+110H]
+ movdqa xmm2, xmmword ptr [rsp+120H]
+ movdqu xmm3, xmmword ptr [rsp+118H]
+ movdqu xmm4, xmmword ptr [rsp+128H]
+ blendvps xmm1, xmm3, xmm0
+ blendvps xmm2, xmm4, xmm0
+ movdqa xmmword ptr [rsp+110H], xmm1
+ movdqa xmmword ptr [rsp+120H], xmm2
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+final1block:
+ test esi, 1H
+ je unwind
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movd xmm13, dword ptr [rsp+110H]
+ pinsrd xmm13, dword ptr [rsp+120H], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ movaps xmm14, xmmword ptr [ROT8]
+ movaps xmm15, xmmword ptr [ROT16]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movaps xmm3, xmm13
+ pinsrd xmm3, eax, 3
+ movups xmm4, xmmword ptr [r8+rdx-40H]
+ movups xmm5, xmmword ptr [r8+rdx-30H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-20H]
+ movups xmm7, xmmword ptr [r8+rdx-10H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ mov al, 7
+roundloop1:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0CCH
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0C0H
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+_blake3_hash_many_sse41 ENDP
+blake3_hash_many_sse41 ENDP
+
+blake3_compress_in_place_sse41 PROC
+_blake3_compress_in_place_sse41 PROC
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+10H], xmm7
+ movdqa xmmword ptr [rsp+20H], xmm8
+ movdqa xmmword ptr [rsp+30H], xmm9
+ movdqa xmmword ptr [rsp+40H], xmm11
+ movdqa xmmword ptr [rsp+50H], xmm14
+ movdqa xmmword ptr [rsp+60H], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movzx eax, byte ptr [rsp+0A0H]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ movd xmm3, r9
+ movd xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+10H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+20H]
+ movups xmm7, xmmword ptr [rdx+30H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ movaps xmm14, xmmword ptr [ROT8]
+ movaps xmm15, xmmword ptr [ROT16]
+ mov al, 7
+@@:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0CCH
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0C0H
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp @B
+@@:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rcx], xmm0
+ movups xmmword ptr [rcx+10H], xmm1
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+10H]
+ movdqa xmm8, xmmword ptr [rsp+20H]
+ movdqa xmm9, xmmword ptr [rsp+30H]
+ movdqa xmm11, xmmword ptr [rsp+40H]
+ movdqa xmm14, xmmword ptr [rsp+50H]
+ movdqa xmm15, xmmword ptr [rsp+60H]
+ add rsp, 120
+ ret
+_blake3_compress_in_place_sse41 ENDP
+blake3_compress_in_place_sse41 ENDP
+
+ALIGN 16
+blake3_compress_xof_sse41 PROC
+_blake3_compress_xof_sse41 PROC
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+10H], xmm7
+ movdqa xmmword ptr [rsp+20H], xmm8
+ movdqa xmmword ptr [rsp+30H], xmm9
+ movdqa xmmword ptr [rsp+40H], xmm11
+ movdqa xmmword ptr [rsp+50H], xmm14
+ movdqa xmmword ptr [rsp+60H], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movzx eax, byte ptr [rsp+0A0H]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+0A8H]
+ shl rax, 32
+ add r8, rax
+ movd xmm3, r9
+ movd xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+10H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+20H]
+ movups xmm7, xmmword ptr [rdx+30H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ movaps xmm14, xmmword ptr [ROT8]
+ movaps xmm15, xmmword ptr [ROT16]
+ mov al, 7
+@@:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0CCH
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0C0H
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp @B
+@@:
+ movdqu xmm4, xmmword ptr [rcx]
+ movdqu xmm5, xmmword ptr [rcx+10H]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r10], xmm0
+ movups xmmword ptr [r10+10H], xmm1
+ movups xmmword ptr [r10+20H], xmm2
+ movups xmmword ptr [r10+30H], xmm3
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+10H]
+ movdqa xmm8, xmmword ptr [rsp+20H]
+ movdqa xmm9, xmmword ptr [rsp+30H]
+ movdqa xmm11, xmmword ptr [rsp+40H]
+ movdqa xmm14, xmmword ptr [rsp+50H]
+ movdqa xmm15, xmmword ptr [rsp+60H]
+ add rsp, 120
+ ret
+_blake3_compress_xof_sse41 ENDP
+blake3_compress_xof_sse41 ENDP
+
+_TEXT ENDS
+
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+BLAKE3_IV:
+ dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH
+
+ADD0:
+ dd 0, 1, 2, 3
+
+ADD1:
+ dd 4 dup (4)
+
+BLAKE3_IV_0:
+ dd 4 dup (6A09E667H)
+
+BLAKE3_IV_1:
+ dd 4 dup (0BB67AE85H)
+
+BLAKE3_IV_2:
+ dd 4 dup (3C6EF372H)
+
+BLAKE3_IV_3:
+ dd 4 dup (0A54FF53AH)
+
+BLAKE3_BLOCK_LEN:
+ dd 4 dup (64)
+
+ROT16:
+ db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+
+ROT8:
+ db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+
+CMP_MSB_MASK:
+ dd 8 dup(80000000H)
+
+_RDATA ENDS
+END
+
diff --git a/src/third_party/doctest.h b/src/third_party/doctest.h
index acbe6cd..7712dd6 100644
--- a/src/third_party/doctest.h
+++ b/src/third_party/doctest.h
@@ -48,8 +48,8 @@
#define DOCTEST_VERSION_MAJOR 2
#define DOCTEST_VERSION_MINOR 4
-#define DOCTEST_VERSION_PATCH 1
-#define DOCTEST_VERSION_STR "2.4.1"
+#define DOCTEST_VERSION_PATCH 4
+#define DOCTEST_VERSION_STR "2.4.4"
#define DOCTEST_VERSION \
(DOCTEST_VERSION_MAJOR * 10000 + DOCTEST_VERSION_MINOR * 100 + DOCTEST_VERSION_PATCH)
@@ -368,7 +368,7 @@ DOCTEST_MSVC_SUPPRESS_WARNING(26812) // Prefer 'enum class' over 'enum'
#define DOCTEST_BREAK_INTO_DEBUGGER() raise(SIGTRAP)
#endif
#elif defined(DOCTEST_PLATFORM_MAC)
-#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64__)
+#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || defined(__i386)
#define DOCTEST_BREAK_INTO_DEBUGGER() __asm__("int $3\n" : :)
#else
#define DOCTEST_BREAK_INTO_DEBUGGER() __asm__("brk #0");
@@ -747,6 +747,7 @@ struct ContextOptions //!OCLINT too many fields
bool gnu_file_line; // if line numbers should be surrounded with :x: and not (x):
bool no_path_in_filenames; // if the path to files should be removed from the output
bool no_line_numbers; // if source code line numbers should be omitted from the output
+ bool no_debug_output; // no output in the debug console when a debugger is attached
bool no_skipped_summary; // don't print "skipped" in the summary !!! UNDOCUMENTED !!!
bool no_time_in_output; // omit any time/timestamps from output !!! UNDOCUMENTED !!!
@@ -806,7 +807,7 @@ namespace detail {
} // namespace has_insertion_operator_impl
template<class T>
- using has_insertion_operator = has_insertion_operator_impl::check<T>;
+ using has_insertion_operator = has_insertion_operator_impl::check<const T>;
DOCTEST_INTERFACE void my_memcpy(void* dest, const void* src, unsigned num);
@@ -1035,6 +1036,7 @@ namespace detail {
template <typename L, typename R>
String stringifyBinaryExpr(const DOCTEST_REF_WRAP(L) lhs, const char* op,
const DOCTEST_REF_WRAP(R) rhs) {
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
return toString(lhs) + op + toString(rhs);
}
@@ -1122,6 +1124,7 @@ namespace detail {
#define DOCTEST_COMPARISON_RETURN_TYPE bool
#else // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
#define DOCTEST_COMPARISON_RETURN_TYPE typename enable_if<can_use_op<L>::value || can_use_op<R>::value, bool>::type
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
inline bool eq(const char* lhs, const char* rhs) { return String(lhs) == String(rhs); }
inline bool ne(const char* lhs, const char* rhs) { return String(lhs) != String(rhs); }
inline bool lt(const char* lhs, const char* rhs) { return String(lhs) < String(rhs); }
@@ -1541,12 +1544,24 @@ namespace detail {
MessageBuilder() = delete;
~MessageBuilder();
+ // the preferred way of chaining parameters for stringification
template <typename T>
- MessageBuilder& operator<<(const T& in) {
+ MessageBuilder& operator,(const T& in) {
toStream(m_stream, in);
return *this;
}
+ // kept here just for backwards-compatibility - the comma operator should be preferred now
+ template <typename T>
+ MessageBuilder& operator<<(const T& in) { return this->operator,(in); }
+
+ // the `,` operator has the lowest operator precedence - if `<<` is used by the user then
+ // the `,` operator will be called last which is not what we want and thus the `*` operator
+ // is used first (has higher operator precedence compared to `<<`) so that we guarantee that
+ // an operator of the MessageBuilder class is called first before the rest of the parameters
+ template <typename T>
+ MessageBuilder& operator*(const T& in) { return this->operator,(in); }
+
bool log();
void react();
};
@@ -1962,38 +1977,38 @@ int registerReporter(const char* name, int priority, bool isReporter) {
DOCTEST_GLOBAL_NO_WARNINGS_END() typedef int DOCTEST_ANONYMOUS(_DOCTEST_ANON_FOR_SEMICOLON_)
// for logging
-#define DOCTEST_INFO(expression) \
+#define DOCTEST_INFO(...) \
DOCTEST_INFO_IMPL(DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), \
- DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), expression)
+ DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), __VA_ARGS__)
-#define DOCTEST_INFO_IMPL(lambda_name, mb_name, s_name, expression) \
+#define DOCTEST_INFO_IMPL(lambda_name, mb_name, s_name, ...) \
DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(4626) \
auto lambda_name = [&](std::ostream* s_name) { \
doctest::detail::MessageBuilder mb_name(__FILE__, __LINE__, doctest::assertType::is_warn); \
mb_name.m_stream = s_name; \
- mb_name << expression; \
+ mb_name * __VA_ARGS__; \
}; \
DOCTEST_MSVC_SUPPRESS_WARNING_POP \
auto DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_) = doctest::detail::MakeContextScope(lambda_name)
-#define DOCTEST_CAPTURE(x) DOCTEST_INFO(#x " := " << x)
+#define DOCTEST_CAPTURE(x) DOCTEST_INFO(#x " := ", x)
-#define DOCTEST_ADD_AT_IMPL(type, file, line, mb, x) \
+#define DOCTEST_ADD_AT_IMPL(type, file, line, mb, ...) \
do { \
doctest::detail::MessageBuilder mb(file, line, doctest::assertType::type); \
- mb << x; \
+ mb * __VA_ARGS__; \
DOCTEST_ASSERT_LOG_AND_REACT(mb); \
} while(false)
// clang-format off
-#define DOCTEST_ADD_MESSAGE_AT(file, line, x) DOCTEST_ADD_AT_IMPL(is_warn, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), x)
-#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, x) DOCTEST_ADD_AT_IMPL(is_check, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), x)
-#define DOCTEST_ADD_FAIL_AT(file, line, x) DOCTEST_ADD_AT_IMPL(is_require, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), x)
+#define DOCTEST_ADD_MESSAGE_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_warn, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), __VA_ARGS__)
+#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_check, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), __VA_ARGS__)
+#define DOCTEST_ADD_FAIL_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_require, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), __VA_ARGS__)
// clang-format on
-#define DOCTEST_MESSAGE(x) DOCTEST_ADD_MESSAGE_AT(__FILE__, __LINE__, x)
-#define DOCTEST_FAIL_CHECK(x) DOCTEST_ADD_FAIL_CHECK_AT(__FILE__, __LINE__, x)
-#define DOCTEST_FAIL(x) DOCTEST_ADD_FAIL_AT(__FILE__, __LINE__, x)
+#define DOCTEST_MESSAGE(...) DOCTEST_ADD_MESSAGE_AT(__FILE__, __LINE__, __VA_ARGS__)
+#define DOCTEST_FAIL_CHECK(...) DOCTEST_ADD_FAIL_CHECK_AT(__FILE__, __LINE__, __VA_ARGS__)
+#define DOCTEST_FAIL(...) DOCTEST_ADD_FAIL_AT(__FILE__, __LINE__, __VA_ARGS__)
#define DOCTEST_TO_LVALUE(...) __VA_ARGS__ // Not removed to keep backwards compatibility.
@@ -2036,12 +2051,12 @@ int registerReporter(const char* name, int priority, bool isReporter) {
#define DOCTEST_REQUIRE_FALSE(...) DOCTEST_ASSERT_IMPLEMENT_1(DT_REQUIRE_FALSE, __VA_ARGS__)
// clang-format off
-#define DOCTEST_WARN_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN, cond); } while(false)
-#define DOCTEST_CHECK_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK, cond); } while(false)
-#define DOCTEST_REQUIRE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE, cond); } while(false)
-#define DOCTEST_WARN_FALSE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN_FALSE, cond); } while(false)
-#define DOCTEST_CHECK_FALSE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK_FALSE, cond); } while(false)
-#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE_FALSE, cond); } while(false)
+#define DOCTEST_WARN_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN, cond); } while(false)
+#define DOCTEST_CHECK_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK, cond); } while(false)
+#define DOCTEST_REQUIRE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE, cond); } while(false)
+#define DOCTEST_WARN_FALSE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN_FALSE, cond); } while(false)
+#define DOCTEST_CHECK_FALSE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK_FALSE, cond); } while(false)
+#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE_FALSE, cond); } while(false)
// clang-format on
#define DOCTEST_ASSERT_THROWS_AS(expr, assert_type, message, ...) \
@@ -2051,8 +2066,8 @@ int registerReporter(const char* name, int priority, bool isReporter) {
__LINE__, #expr, #__VA_ARGS__, message); \
try { \
DOCTEST_CAST_TO_VOID(expr) \
- } catch(const doctest::detail::remove_const< \
- doctest::detail::remove_reference<__VA_ARGS__>::type>::type&) { \
+ } catch(const typename doctest::detail::remove_const< \
+ typename doctest::detail::remove_reference<__VA_ARGS__>::type>::type&) { \
_DOCTEST_RB.translateException(); \
_DOCTEST_RB.m_threw_as = true; \
} catch(...) { _DOCTEST_RB.translateException(); } \
@@ -2103,21 +2118,21 @@ int registerReporter(const char* name, int priority, bool isReporter) {
#define DOCTEST_CHECK_NOTHROW(...) DOCTEST_ASSERT_NOTHROW(DT_CHECK_NOTHROW, __VA_ARGS__)
#define DOCTEST_REQUIRE_NOTHROW(...) DOCTEST_ASSERT_NOTHROW(DT_REQUIRE_NOTHROW, __VA_ARGS__)
-#define DOCTEST_WARN_THROWS_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS(expr); } while(false)
-#define DOCTEST_CHECK_THROWS_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS(expr); } while(false)
-#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS(expr); } while(false)
-#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS_AS(expr, ex); } while(false)
-#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS_AS(expr, ex); } while(false)
-#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS_AS(expr, ex); } while(false)
-#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS_WITH(expr, with); } while(false)
-#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS_WITH(expr, with); } while(false)
-#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS_WITH(expr, with); } while(false)
-#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS_WITH_AS(expr, with, ex); } while(false)
-#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS_WITH_AS(expr, with, ex); } while(false)
-#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, ex); } while(false)
-#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_NOTHROW(expr); } while(false)
-#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_NOTHROW(expr); } while(false)
-#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_NOTHROW(expr); } while(false)
+#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS(expr); } while(false)
+#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS(expr); } while(false)
+#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS(expr); } while(false)
+#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_AS(expr, ex); } while(false)
+#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_AS(expr, ex); } while(false)
+#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_AS(expr, ex); } while(false)
+#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_WITH(expr, with); } while(false)
+#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_WITH(expr, with); } while(false)
+#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_WITH(expr, with); } while(false)
+#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_WITH_AS(expr, with, ex); } while(false)
+#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_WITH_AS(expr, with, ex); } while(false)
+#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, ex); } while(false)
+#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_NOTHROW(expr); } while(false)
+#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_NOTHROW(expr); } while(false)
+#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_NOTHROW(expr); } while(false)
// clang-format on
#ifndef DOCTEST_CONFIG_SUPER_FAST_ASSERTS
@@ -2230,21 +2245,21 @@ int registerReporter(const char* name, int priority, bool isReporter) {
#define DOCTEST_CHECK_NOTHROW(...) (static_cast<void>(0))
#define DOCTEST_REQUIRE_NOTHROW(...) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
#else // DOCTEST_CONFIG_NO_EXCEPTIONS_BUT_WITH_ALL_ASSERTS
@@ -2335,14 +2350,14 @@ int registerReporter(const char* name, int priority, bool isReporter) {
#define DOCTEST_REGISTER_REPORTER(name, priority, reporter)
#define DOCTEST_REGISTER_LISTENER(name, priority, reporter)
-#define DOCTEST_INFO(x) (static_cast<void>(0))
+#define DOCTEST_INFO(...) (static_cast<void>(0))
#define DOCTEST_CAPTURE(x) (static_cast<void>(0))
-#define DOCTEST_ADD_MESSAGE_AT(file, line, x) (static_cast<void>(0))
-#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, x) (static_cast<void>(0))
-#define DOCTEST_ADD_FAIL_AT(file, line, x) (static_cast<void>(0))
-#define DOCTEST_MESSAGE(x) (static_cast<void>(0))
-#define DOCTEST_FAIL_CHECK(x) (static_cast<void>(0))
-#define DOCTEST_FAIL(x) (static_cast<void>(0))
+#define DOCTEST_ADD_MESSAGE_AT(file, line, ...) (static_cast<void>(0))
+#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, ...) (static_cast<void>(0))
+#define DOCTEST_ADD_FAIL_AT(file, line, ...) (static_cast<void>(0))
+#define DOCTEST_MESSAGE(...) (static_cast<void>(0))
+#define DOCTEST_FAIL_CHECK(...) (static_cast<void>(0))
+#define DOCTEST_FAIL(...) (static_cast<void>(0))
#define DOCTEST_WARN(...) (static_cast<void>(0))
#define DOCTEST_CHECK(...) (static_cast<void>(0))
@@ -2351,12 +2366,12 @@ int registerReporter(const char* name, int priority, bool isReporter) {
#define DOCTEST_CHECK_FALSE(...) (static_cast<void>(0))
#define DOCTEST_REQUIRE_FALSE(...) (static_cast<void>(0))
-#define DOCTEST_WARN_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_FALSE_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_FALSE_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, msg) (static_cast<void>(0))
+#define DOCTEST_WARN_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_FALSE_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_FALSE_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, ...) (static_cast<void>(0))
#define DOCTEST_WARN_THROWS(...) (static_cast<void>(0))
#define DOCTEST_CHECK_THROWS(...) (static_cast<void>(0))
@@ -2374,21 +2389,21 @@ int registerReporter(const char* name, int priority, bool isReporter) {
#define DOCTEST_CHECK_NOTHROW(...) (static_cast<void>(0))
#define DOCTEST_REQUIRE_NOTHROW(...) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
#define DOCTEST_WARN_EQ(...) (static_cast<void>(0))
#define DOCTEST_CHECK_EQ(...) (static_cast<void>(0))
@@ -2754,9 +2769,7 @@ DOCTEST_MAKE_STD_HEADERS_CLEAN_FROM_WARNINGS_ON_WALL_BEGIN
#include <map>
#include <exception>
#include <stdexcept>
-#ifdef DOCTEST_CONFIG_POSIX_SIGNALS
#include <csignal>
-#endif // DOCTEST_CONFIG_POSIX_SIGNALS
#include <cfloat>
#include <cctype>
#include <cstdint>
@@ -3071,6 +3084,7 @@ String::String() {
String::~String() {
if(!isOnStack())
delete[] data.ptr;
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
}
String::String(const char* in)
@@ -3112,6 +3126,7 @@ String& String::operator+=(const String& other) {
if(total_size < len) {
// append to the current stack space
memcpy(buf + my_old_size, other.c_str(), other_size + 1);
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
setLast(last - total_size);
} else {
// alloc new chunk
@@ -3153,6 +3168,7 @@ String& String::operator+=(const String& other) {
return *this;
}
+// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
String String::operator+(const String& other) const { return String(*this) += other; }
String::String(String&& other) {
@@ -3307,6 +3323,7 @@ DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wnull-dereference")
DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wnull-dereference")
// depending on the current options this will remove the path of filenames
const char* skipPathFromFilename(const char* file) {
+#ifndef DOCTEST_CONFIG_DISABLE
if(getContextOptions()->no_path_in_filenames) {
auto back = std::strrchr(file, '\\');
auto forward = std::strrchr(file, '/');
@@ -3316,6 +3333,7 @@ const char* skipPathFromFilename(const char* file) {
return forward + 1;
}
}
+#endif // DOCTEST_CONFIG_DISABLE
return file;
}
DOCTEST_CLANG_SUPPRESS_WARNING_POP
@@ -3334,6 +3352,7 @@ IContextScope::~IContextScope() = default;
#ifdef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
String toString(char* in) { return toString(static_cast<const char*>(in)); }
+// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
String toString(const char* in) { return String("\"") + (in ? in : "{null string}") + "\""; }
#endif // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
String toString(bool in) { return in ? "true" : "false"; }
@@ -3406,6 +3425,7 @@ bool operator>(double lhs, const Approx& rhs) { return lhs > rhs.m_value && lhs
bool operator>(const Approx& lhs, double rhs) { return lhs.m_value > rhs && lhs != rhs; }
String toString(const Approx& in) {
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
return String("Approx( ") + doctest::toString(in.m_value) + " )";
}
const ContextOptions* getContextOptions() { return DOCTEST_BRANCH_ON_DISABLED(nullptr, g_cs); }
@@ -3698,11 +3718,15 @@ namespace detail {
}
bool TestCase::operator<(const TestCase& other) const {
+ // this will be used only to differentiate between test cases - not relevant for sorting
if(m_line != other.m_line)
return m_line < other.m_line;
const int file_cmp = m_file.compare(other.m_file);
if(file_cmp != 0)
return file_cmp < 0;
+ const int name_cmp = strcmp(m_name, other.m_name);
+ if(name_cmp != 0)
+ return name_cmp < 0;
return m_template_id < other.m_template_id;
}
} // namespace detail
@@ -4009,24 +4033,40 @@ namespace {
// Windows can easily distinguish between SO and SigSegV,
// but SigInt, SigTerm, etc are handled differently.
SignalDefs signalDefs[] = {
- {EXCEPTION_ILLEGAL_INSTRUCTION, "SIGILL - Illegal instruction signal"},
- {EXCEPTION_STACK_OVERFLOW, "SIGSEGV - Stack overflow"},
- {EXCEPTION_ACCESS_VIOLATION, "SIGSEGV - Segmentation violation signal"},
- {EXCEPTION_INT_DIVIDE_BY_ZERO, "Divide by zero error"},
+ {static_cast<DWORD>(EXCEPTION_ILLEGAL_INSTRUCTION),
+ "SIGILL - Illegal instruction signal"},
+ {static_cast<DWORD>(EXCEPTION_STACK_OVERFLOW), "SIGSEGV - Stack overflow"},
+ {static_cast<DWORD>(EXCEPTION_ACCESS_VIOLATION),
+ "SIGSEGV - Segmentation violation signal"},
+ {static_cast<DWORD>(EXCEPTION_INT_DIVIDE_BY_ZERO), "Divide by zero error"},
};
struct FatalConditionHandler
{
static LONG CALLBACK handleException(PEXCEPTION_POINTERS ExceptionInfo) {
- for(size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) {
- if(ExceptionInfo->ExceptionRecord->ExceptionCode == signalDefs[i].id) {
- reportFatal(signalDefs[i].name);
- break;
+ // Multiple threads may enter this filter/handler at once. We want the error message to be printed on the
+ // console just once no matter how many threads have crashed.
+ static std::mutex mutex;
+ static bool execute = true;
+ {
+ std::lock_guard<std::mutex> lock(mutex);
+ if(execute) {
+ bool reported = false;
+ for(size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) {
+ if(ExceptionInfo->ExceptionRecord->ExceptionCode == signalDefs[i].id) {
+ reportFatal(signalDefs[i].name);
+ reported = true;
+ break;
+ }
+ }
+ if(reported == false)
+ reportFatal("Unhandled SEH exception caught");
+ if(isDebuggerActive() && !g_cs->no_breaks)
+ DOCTEST_BREAK_INTO_DEBUGGER();
}
+ execute = false;
}
- // If its not an exception we care about, pass it along.
- // This stops us from eating debugger breaks etc.
- return EXCEPTION_CONTINUE_SEARCH;
+ std::exit(EXIT_FAILURE);
}
FatalConditionHandler() {
@@ -4038,6 +4078,51 @@ namespace {
previousTop = SetUnhandledExceptionFilter(handleException);
// Pass in guarantee size to be filled
SetThreadStackGuarantee(&guaranteeSize);
+
+ // On Windows uncaught exceptions from another thread, exceptions from
+ // destructors, or calls to std::terminate are not a SEH exception
+
+ // The terminal handler gets called when:
+ // - std::terminate is called FROM THE TEST RUNNER THREAD
+ // - an exception is thrown from a destructor FROM THE TEST RUNNER THREAD
+ original_terminate_handler = std::get_terminate();
+ std::set_terminate([]() noexcept {
+ reportFatal("Terminate handler called");
+ if(isDebuggerActive() && !g_cs->no_breaks)
+ DOCTEST_BREAK_INTO_DEBUGGER();
+ std::exit(EXIT_FAILURE); // explicitly exit - otherwise the SIGABRT handler may be called as well
+ });
+
+ // SIGABRT is raised when:
+ // - std::terminate is called FROM A DIFFERENT THREAD
+ // - an exception is thrown from a destructor FROM A DIFFERENT THREAD
+ // - an uncaught exception is thrown FROM A DIFFERENT THREAD
+ prev_sigabrt_handler = std::signal(SIGABRT, [](int signal) noexcept {
+ if(signal == SIGABRT) {
+ reportFatal("SIGABRT - Abort (abnormal termination) signal");
+ if(isDebuggerActive() && !g_cs->no_breaks)
+ DOCTEST_BREAK_INTO_DEBUGGER();
+ std::exit(EXIT_FAILURE);
+ }
+ });
+
+ // The following settings are taken from google test, and more
+ // specifically from UnitTest::Run() inside of gtest.cc
+
+ // the user does not want to see pop-up dialogs about crashes
+ prev_error_mode_1 = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
+ SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
+ // This forces the abort message to go to stderr in all circumstances.
+ prev_error_mode_2 = _set_error_mode(_OUT_TO_STDERR);
+ // In the debug version, Visual Studio pops up a separate dialog
+ // offering a choice to debug the aborted program - we want to disable that.
+ prev_abort_behavior = _set_abort_behavior(0x0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT);
+ // In debug mode, the Windows CRT can crash with an assertion over invalid
+ // input (e.g. passing an invalid file descriptor). The default handling
+ // for these assertions is to pop up a dialog and wait for user input.
+ // Instead ask the CRT to dump such assertions to stderr non-interactively.
+ prev_report_mode = _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+ prev_report_file = _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
}
static void reset() {
@@ -4045,7 +4130,13 @@ namespace {
// Unregister handler and restore the old guarantee
SetUnhandledExceptionFilter(previousTop);
SetThreadStackGuarantee(&guaranteeSize);
- previousTop = nullptr;
+ std::set_terminate(original_terminate_handler);
+ std::signal(SIGABRT, prev_sigabrt_handler);
+ SetErrorMode(prev_error_mode_1);
+ _set_error_mode(prev_error_mode_2);
+ _set_abort_behavior(prev_abort_behavior, _WRITE_ABORT_MSG | _CALL_REPORTFAULT);
+ _CrtSetReportMode(_CRT_ASSERT, prev_report_mode);
+ _CrtSetReportFile(_CRT_ASSERT, prev_report_file);
isSet = false;
}
}
@@ -4053,11 +4144,25 @@ namespace {
~FatalConditionHandler() { reset(); }
private:
+ static UINT prev_error_mode_1;
+ static int prev_error_mode_2;
+ static unsigned int prev_abort_behavior;
+ static int prev_report_mode;
+ static _HFILE prev_report_file;
+ static void (*prev_sigabrt_handler)(int);
+ static std::terminate_handler original_terminate_handler;
static bool isSet;
static ULONG guaranteeSize;
static LPTOP_LEVEL_EXCEPTION_FILTER previousTop;
};
+ UINT FatalConditionHandler::prev_error_mode_1;
+ int FatalConditionHandler::prev_error_mode_2;
+ unsigned int FatalConditionHandler::prev_abort_behavior;
+ int FatalConditionHandler::prev_report_mode;
+ _HFILE FatalConditionHandler::prev_report_file;
+ void (*FatalConditionHandler::prev_sigabrt_handler)(int);
+ std::terminate_handler FatalConditionHandler::original_terminate_handler;
bool FatalConditionHandler::isSet = false;
ULONG FatalConditionHandler::guaranteeSize = 0;
LPTOP_LEVEL_EXCEPTION_FILTER FatalConditionHandler::previousTop = nullptr;
@@ -4257,6 +4362,7 @@ namespace detail {
// ###################################################################################
DOCTEST_ASSERT_OUT_OF_TESTS(result.m_decomp);
DOCTEST_ASSERT_IN_TESTS(result.m_decomp);
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
}
MessageBuilder::MessageBuilder(const char* file, int line, assertType::Enum severity) {
@@ -4979,7 +5085,6 @@ namespace {
}
// TODO:
- // - log_contexts()
// - log_message()
// - respond to queries
// - honor remaining options
@@ -4993,7 +5098,6 @@ namespace {
struct JUnitTestCaseData
{
-DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations") // gmtime
static std::string getCurrentTimestamp() {
// Beware, this is not reentrant because of backward compatibility issues
// Also, UTC only, again because of backward compatibility (%z is C++11)
@@ -5001,16 +5105,19 @@ DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations") // gmtime
std::time(&rawtime);
auto const timeStampSize = sizeof("2017-01-16T17:06:45Z");
- std::tm* timeInfo;
- timeInfo = std::gmtime(&rawtime);
+ std::tm timeInfo;
+#ifdef DOCTEST_PLATFORM_WINDOWS
+ gmtime_s(&timeInfo, &rawtime);
+#else // DOCTEST_PLATFORM_WINDOWS
+ gmtime_r(&rawtime, &timeInfo);
+#endif // DOCTEST_PLATFORM_WINDOWS
char timeStamp[timeStampSize];
const char* const fmt = "%Y-%m-%dT%H:%M:%SZ";
- std::strftime(timeStamp, timeStampSize, fmt, timeInfo);
+ std::strftime(timeStamp, timeStampSize, fmt, &timeInfo);
return std::string(timeStamp);
}
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
struct JUnitTestMessage
{
@@ -5175,12 +5282,27 @@ DOCTEST_CLANG_SUPPRESS_WARNING_POP
<< line(rb.m_line) << (opt.gnu_file_line ? ":" : "):") << std::endl;
fulltext_log_assert_to_stream(os, rb);
+ log_contexts(os);
testCaseData.addFailure(rb.m_decomp.c_str(), assertString(rb.m_at), os.str());
}
void log_message(const MessageData&) override {}
void test_case_skipped(const TestCaseData&) override {}
+
+ void log_contexts(std::ostringstream& s) {
+ int num_contexts = get_num_active_contexts();
+ if(num_contexts) {
+ auto contexts = get_active_contexts();
+
+ s << " logged: ";
+ for(int i = 0; i < num_contexts; ++i) {
+ s << (i == 0 ? "" : " ");
+ contexts[i]->stringify(&s);
+ s << std::endl;
+ }
+ }
+ }
};
DOCTEST_REGISTER_REPORTER("junit", 0, JUnitReporter);
@@ -5894,6 +6016,7 @@ void Context::parseArgs(int argc, const char* const* argv, bool withDefaults) {
DOCTEST_PARSE_AS_BOOL_OR_FLAG("gnu-file-line", "gfl", gnu_file_line, !bool(DOCTEST_MSVC));
DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-path-filenames", "npf", no_path_in_filenames, false);
DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-line-numbers", "nln", no_line_numbers, false);
+ DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-debug-output", "ndo", no_debug_output, false);
DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-skipped-summary", "nss", no_skipped_summary, false);
DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-time-in-output", "ntio", no_time_in_output, false);
// clang-format on
@@ -5951,6 +6074,7 @@ void Context::clearFilters() {
// allows the user to override procedurally the int/bool options from the command line
void Context::setOption(const char* option, int value) {
setOption(option, toString(value).c_str());
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
}
// allows the user to override procedurally the string options from the command line
@@ -6026,7 +6150,7 @@ int Context::run() {
p->reporters_currently_used.insert(p->reporters_currently_used.begin(), curr.second(*g_cs));
#ifdef DOCTEST_PLATFORM_WINDOWS
- if(isDebuggerActive())
+ if(isDebuggerActive() && p->no_debug_output == false)
p->reporters_currently_used.push_back(new DebugOutputWindowReporter(*g_cs));
#endif // DOCTEST_PLATFORM_WINDOWS
diff --git a/src/third_party/fmt/core.h b/src/third_party/fmt/core.h
index 031bf86..0a81e0c 100644
--- a/src/third_party/fmt/core.h
+++ b/src/third_party/fmt/core.h
@@ -18,7 +18,7 @@
#include <vector>
// The fmt library version in the form major * 10000 + minor * 100 + patch.
-#define FMT_VERSION 70003
+#define FMT_VERSION 70103
#ifdef __clang__
# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
@@ -57,6 +57,7 @@
# define FMT_MSC_VER 0
# define FMT_SUPPRESS_MSC_WARNING(n)
#endif
+
#ifdef __has_feature
# define FMT_HAS_FEATURE(x) __has_feature(x)
#else
@@ -64,7 +65,7 @@
#endif
#if defined(__has_include) && !defined(__INTELLISENSE__) && \
- !(FMT_ICC_VERSION && FMT_ICC_VERSION < 1600)
+ (!FMT_ICC_VERSION || FMT_ICC_VERSION >= 1600)
# define FMT_HAS_INCLUDE(x) __has_include(x)
#else
# define FMT_HAS_INCLUDE(x) 0
@@ -152,7 +153,7 @@
# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VER >= 1900
# define FMT_DEPRECATED [[deprecated]]
# else
-# if defined(__GNUC__) || defined(__clang__)
+# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__)
# define FMT_DEPRECATED __attribute__((deprecated))
# elif FMT_MSC_VER
# define FMT_DEPRECATED __declspec(deprecated)
@@ -177,9 +178,17 @@
# endif
#endif
-#ifndef FMT_BEGIN_NAMESPACE
+#ifndef FMT_USE_INLINE_NAMESPACES
# if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \
- FMT_MSC_VER >= 1900
+ (FMT_MSC_VER >= 1900 && !_MANAGED)
+# define FMT_USE_INLINE_NAMESPACES 1
+# else
+# define FMT_USE_INLINE_NAMESPACES 0
+# endif
+#endif
+
+#ifndef FMT_BEGIN_NAMESPACE
+# if FMT_USE_INLINE_NAMESPACES
# define FMT_INLINE_NAMESPACE inline namespace
# define FMT_END_NAMESPACE \
} \
@@ -269,8 +278,7 @@ struct monostate {};
namespace detail {
-// A helper function to suppress bogus "conditional expression is constant"
-// warnings.
+// A helper function to suppress "conditional expression is constant" warnings.
template <typename T> constexpr T const_check(T value) { return value; }
FMT_NORETURN FMT_API void assert_fail(const char* file, int line,
@@ -299,7 +307,8 @@ template <typename T> struct std_string_view {};
#ifdef FMT_USE_INT128
// Do nothing.
-#elif defined(__SIZEOF_INT128__) && !FMT_NVCC && !(FMT_CLANG_VERSION && FMT_MSC_VER)
+#elif defined(__SIZEOF_INT128__) && !FMT_NVCC && \
+ !(FMT_CLANG_VERSION && FMT_MSC_VER)
# define FMT_USE_INT128 1
using int128_t = __int128_t;
using uint128_t = __uint128_t;
@@ -506,6 +515,18 @@ template <typename S> struct char_t_impl<S, enable_if_t<is_string<S>::value>> {
using type = typename result::value_type;
};
+// Reports a compile-time error if S is not a valid format string.
+template <typename..., typename S, FMT_ENABLE_IF(!is_compile_string<S>::value)>
+FMT_INLINE void check_format_string(const S&) {
+#ifdef FMT_ENFORCE_COMPILE_STRING
+ static_assert(is_compile_string<S>::value,
+ "FMT_ENFORCE_COMPILE_STRING requires all format strings to use "
+ "FMT_STRING.");
+#endif
+}
+template <typename..., typename S, FMT_ENABLE_IF(is_compile_string<S>::value)>
+void check_format_string(S);
+
struct error_handler {
constexpr error_handler() = default;
constexpr error_handler(const error_handler&) = default;
@@ -545,8 +566,9 @@ class basic_format_parse_context : private ErrorHandler {
using iterator = typename basic_string_view<Char>::iterator;
explicit constexpr basic_format_parse_context(
- basic_string_view<Char> format_str, ErrorHandler eh = {})
- : ErrorHandler(eh), format_str_(format_str), next_arg_id_(0) {}
+ basic_string_view<Char> format_str, ErrorHandler eh = {},
+ int next_arg_id = 0)
+ : ErrorHandler(eh), format_str_(format_str), next_arg_id_(next_arg_id) {}
/**
Returns an iterator to the beginning of the format string range being
@@ -616,8 +638,24 @@ template <typename T, typename Context>
using has_formatter =
std::is_constructible<typename Context::template formatter_type<T>>;
+// Checks whether T is a container with contiguous storage.
+template <typename T> struct is_contiguous : std::false_type {};
+template <typename Char>
+struct is_contiguous<std::basic_string<Char>> : std::true_type {};
+
namespace detail {
+// Extracts a reference to the container from back_insert_iterator.
+template <typename Container>
+inline Container& get_container(std::back_insert_iterator<Container> it) {
+ using bi_iterator = std::back_insert_iterator<Container>;
+ struct accessor : bi_iterator {
+ accessor(bi_iterator iter) : bi_iterator(iter) {}
+ using bi_iterator::container;
+ };
+ return *accessor(it).container;
+}
+
/**
\rst
A contiguous memory buffer with an optional growing ability. It is an internal
@@ -640,6 +678,8 @@ template <typename T> class buffer {
size_(sz),
capacity_(cap) {}
+ ~buffer() = default;
+
/** Sets the buffer data and capacity. */
void set(T* buf_data, size_t buf_capacity) FMT_NOEXCEPT {
ptr_ = buf_data;
@@ -655,7 +695,6 @@ template <typename T> class buffer {
buffer(const buffer&) = delete;
void operator=(const buffer&) = delete;
- virtual ~buffer() = default;
T* begin() FMT_NOEXCEPT { return ptr_; }
T* end() FMT_NOEXCEPT { return ptr_ + size_; }
@@ -675,24 +714,26 @@ template <typename T> class buffer {
/** Returns a pointer to the buffer data. */
const T* data() const FMT_NOEXCEPT { return ptr_; }
- /**
- Resizes the buffer. If T is a POD type new elements may not be initialized.
- */
- void resize(size_t new_size) {
- reserve(new_size);
- size_ = new_size;
- }
-
/** Clears this buffer. */
void clear() { size_ = 0; }
- /** Reserves space to store at least *capacity* elements. */
- void reserve(size_t new_capacity) {
+ // Tries resizing the buffer to contain *count* elements. If T is a POD type
+ // the new elements may not be initialized.
+ void try_resize(size_t count) {
+ try_reserve(count);
+ size_ = count <= capacity_ ? count : capacity_;
+ }
+
+ // Tries increasing the buffer capacity to *new_capacity*. It can increase the
+ // capacity by a smaller amount than requested but guarantees there is space
+ // for at least one additional element either by increasing the capacity or by
+ // flushing the buffer if it is full.
+ void try_reserve(size_t new_capacity) {
if (new_capacity > capacity_) grow(new_capacity);
}
void push_back(const T& value) {
- reserve(size_ + 1);
+ try_reserve(size_ + 1);
ptr_[size_++] = value;
}
@@ -705,32 +746,150 @@ template <typename T> class buffer {
}
};
-// A container-backed buffer.
+struct buffer_traits {
+ explicit buffer_traits(size_t) {}
+ size_t count() const { return 0; }
+ size_t limit(size_t size) { return size; }
+};
+
+class fixed_buffer_traits {
+ private:
+ size_t count_ = 0;
+ size_t limit_;
+
+ public:
+ explicit fixed_buffer_traits(size_t limit) : limit_(limit) {}
+ size_t count() const { return count_; }
+ size_t limit(size_t size) {
+ size_t n = limit_ > count_ ? limit_ - count_ : 0;
+ count_ += size;
+ return size < n ? size : n;
+ }
+};
+
+// A buffer that writes to an output iterator when flushed.
+template <typename OutputIt, typename T, typename Traits = buffer_traits>
+class iterator_buffer final : public Traits, public buffer<T> {
+ private:
+ OutputIt out_;
+ enum { buffer_size = 256 };
+ T data_[buffer_size];
+
+ protected:
+ void grow(size_t) final FMT_OVERRIDE {
+ if (this->size() == buffer_size) flush();
+ }
+ void flush();
+
+ public:
+ explicit iterator_buffer(OutputIt out, size_t n = buffer_size)
+ : Traits(n),
+ buffer<T>(data_, 0, buffer_size),
+ out_(out) {}
+ ~iterator_buffer() { flush(); }
+
+ OutputIt out() {
+ flush();
+ return out_;
+ }
+ size_t count() const { return Traits::count() + this->size(); }
+};
+
+template <typename T> class iterator_buffer<T*, T> final : public buffer<T> {
+ protected:
+ void grow(size_t) final FMT_OVERRIDE {}
+
+ public:
+ explicit iterator_buffer(T* out, size_t = 0) : buffer<T>(out, 0, ~size_t()) {}
+
+ T* out() { return &*this->end(); }
+};
+
+// A buffer that writes to a container with the contiguous storage.
template <typename Container>
-class container_buffer : public buffer<typename Container::value_type> {
+class iterator_buffer<std::back_insert_iterator<Container>,
+ enable_if_t<is_contiguous<Container>::value,
+ typename Container::value_type>>
+ final : public buffer<typename Container::value_type> {
private:
Container& container_;
protected:
- void grow(size_t capacity) FMT_OVERRIDE {
+ void grow(size_t capacity) final FMT_OVERRIDE {
container_.resize(capacity);
this->set(&container_[0], capacity);
}
public:
- explicit container_buffer(Container& c)
+ explicit iterator_buffer(Container& c)
: buffer<typename Container::value_type>(c.size()), container_(c) {}
+ explicit iterator_buffer(std::back_insert_iterator<Container> out, size_t = 0)
+ : iterator_buffer(get_container(out)) {}
+ std::back_insert_iterator<Container> out() {
+ return std::back_inserter(container_);
+ }
};
-// Extracts a reference to the container from back_insert_iterator.
-template <typename Container>
-inline Container& get_container(std::back_insert_iterator<Container> it) {
- using bi_iterator = std::back_insert_iterator<Container>;
- struct accessor : bi_iterator {
- accessor(bi_iterator iter) : bi_iterator(iter) {}
- using bi_iterator::container;
- };
- return *accessor(it).container;
+// A buffer that counts the number of code units written discarding the output.
+template <typename T = char> class counting_buffer final : public buffer<T> {
+ private:
+ enum { buffer_size = 256 };
+ T data_[buffer_size];
+ size_t count_ = 0;
+
+ protected:
+ void grow(size_t) final FMT_OVERRIDE {
+ if (this->size() != buffer_size) return;
+ count_ += this->size();
+ this->clear();
+ }
+
+ public:
+ counting_buffer() : buffer<T>(data_, 0, buffer_size) {}
+
+ size_t count() { return count_ + this->size(); }
+};
+
+// An output iterator that appends to the buffer.
+// It is used to reduce symbol sizes for the common case.
+template <typename T>
+class buffer_appender : public std::back_insert_iterator<buffer<T>> {
+ using base = std::back_insert_iterator<buffer<T>>;
+
+ public:
+ explicit buffer_appender(buffer<T>& buf) : base(buf) {}
+ buffer_appender(base it) : base(it) {}
+
+ buffer_appender& operator++() {
+ base::operator++();
+ return *this;
+ }
+
+ buffer_appender operator++(int) {
+ buffer_appender tmp = *this;
+ ++*this;
+ return tmp;
+ }
+};
+
+// Maps an output iterator into a buffer.
+template <typename T, typename OutputIt>
+iterator_buffer<OutputIt, T> get_buffer(OutputIt);
+template <typename T> buffer<T>& get_buffer(buffer_appender<T>);
+
+template <typename OutputIt> OutputIt get_buffer_init(OutputIt out) {
+ return out;
+}
+template <typename T> buffer<T>& get_buffer_init(buffer_appender<T> out) {
+ return get_container(out);
+}
+
+template <typename Buffer>
+auto get_iterator(Buffer& buf) -> decltype(buf.out()) {
+ return buf.out();
+}
+template <typename T> buffer_appender<T> get_iterator(buffer<T>& buf) {
+ return buffer_appender<T>(buf);
}
template <typename T, typename Char = char, typename Enable = void>
@@ -759,7 +918,8 @@ template <typename Char> struct named_arg_info {
template <typename T, typename Char, size_t NUM_ARGS, size_t NUM_NAMED_ARGS>
struct arg_data {
// args_[0].named_args points to named_args_ to avoid bloating format_args.
- T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : 1)];
+ // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning.
+ T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : +1)];
named_arg_info<Char> named_args_[NUM_NAMED_ARGS];
template <typename... U>
@@ -771,7 +931,8 @@ struct arg_data {
template <typename T, typename Char, size_t NUM_ARGS>
struct arg_data<T, Char, NUM_ARGS, 0> {
- T args_[NUM_ARGS != 0 ? NUM_ARGS : 1];
+ // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning.
+ T args_[NUM_ARGS != 0 ? NUM_ARGS : +1];
template <typename... U>
FMT_INLINE arg_data(const U&... init) : args_{init...} {}
@@ -959,6 +1120,8 @@ enum { long_short = sizeof(long) == sizeof(int) };
using long_type = conditional_t<long_short, int, long long>;
using ulong_type = conditional_t<long_short, unsigned, unsigned long long>;
+struct unformattable {};
+
// Maps formatting arguments to core types.
template <typename Context> struct arg_mapper {
using char_type = typename Context::char_type;
@@ -1067,15 +1230,7 @@ template <typename Context> struct arg_mapper {
return map(val.value);
}
- int map(...) {
- constexpr bool formattable = sizeof(Context) == 0;
- static_assert(
- formattable,
- "Cannot format argument. To make type T formattable provide a "
- "formatter<T> specialization: "
- "https://fmt.dev/latest/api.html#formatting-user-defined-types");
- return 0;
- }
+ unformattable map(...) { return {}; }
};
// A type constant after applying arg_mapper<Context>.
@@ -1199,15 +1354,25 @@ FMT_CONSTEXPR_DECL FMT_INLINE auto visit_format_arg(
return vis(monostate());
}
-// Checks whether T is a container with contiguous storage.
-template <typename T> struct is_contiguous : std::false_type {};
-template <typename Char>
-struct is_contiguous<std::basic_string<Char>> : std::true_type {};
-template <typename Char>
-struct is_contiguous<detail::buffer<Char>> : std::true_type {};
+template <typename T> struct formattable : std::false_type {};
namespace detail {
+// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
+template <typename... Ts> struct void_t_impl { using type = void; };
+template <typename... Ts>
+using void_t = typename detail::void_t_impl<Ts...>::type;
+
+template <typename It, typename T, typename Enable = void>
+struct is_output_iterator : std::false_type {};
+
+template <typename It, typename T>
+struct is_output_iterator<
+ It, T,
+ void_t<typename std::iterator_traits<It>::iterator_category,
+ decltype(*std::declval<It>() = std::declval<T>())>>
+ : std::true_type {};
+
template <typename OutputIt>
struct is_back_insert_iterator : std::false_type {};
template <typename Container>
@@ -1219,6 +1384,9 @@ struct is_contiguous_back_insert_iterator : std::false_type {};
template <typename Container>
struct is_contiguous_back_insert_iterator<std::back_insert_iterator<Container>>
: is_contiguous<Container> {};
+template <typename Char>
+struct is_contiguous_back_insert_iterator<buffer_appender<Char>>
+ : std::true_type {};
// A type-erased reference to an std::locale to avoid heavy <locale> include.
class locale_ref {
@@ -1250,13 +1418,24 @@ FMT_CONSTEXPR basic_format_arg<Context> make_arg(const T& value) {
return arg;
}
+template <typename T> int check(unformattable) {
+ static_assert(
+ formattable<T>(),
+ "Cannot format an argument. To make type T formattable provide a "
+ "formatter<T> specialization: https://fmt.dev/latest/api.html#udt");
+ return 0;
+}
+template <typename T, typename U> inline const U& check(const U& val) {
+ return val;
+}
+
// The type template parameter is there to avoid an ODR violation when using
// a fallback formatter in one translation unit and an implicit conversion in
// another (not recommended).
template <bool IS_PACKED, typename Context, type, typename T,
FMT_ENABLE_IF(IS_PACKED)>
inline value<Context> make_arg(const T& val) {
- return arg_mapper<Context>().map(val);
+ return check<T>(arg_mapper<Context>().map(val));
}
template <bool IS_PACKED, typename Context, type, typename T,
@@ -1356,13 +1535,13 @@ template <typename OutputIt, typename Char> class basic_format_context {
template <typename Char>
using buffer_context =
- basic_format_context<std::back_insert_iterator<detail::buffer<Char>>, Char>;
+ basic_format_context<detail::buffer_appender<Char>, Char>;
using format_context = buffer_context<char>;
using wformat_context = buffer_context<wchar_t>;
-// Workaround a bug in gcc: https://stackoverflow.com/q/62767544/471164.
+// Workaround an alias issue: https://stackoverflow.com/q/62767544/471164.
#define FMT_BUFFER_CONTEXT(Char) \
- basic_format_context<std::back_insert_iterator<detail::buffer<Char>>, Char>
+ basic_format_context<detail::buffer_appender<Char>, Char>
/**
\rst
@@ -1414,7 +1593,7 @@ class format_arg_store
/**
\rst
- Constructs an `~fmt::format_arg_store` object that contains references to
+ Constructs a `~fmt::format_arg_store` object that contains references to
arguments and can be implicitly converted to `~fmt::format_args`. `Context`
can be omitted in which case it defaults to `~fmt::context`.
See `~fmt::arg` for lifetime considerations.
@@ -1428,6 +1607,27 @@ inline format_arg_store<Context, Args...> make_format_args(
/**
\rst
+ Constructs a `~fmt::format_arg_store` object that contains references
+ to arguments and can be implicitly converted to `~fmt::format_args`.
+ If ``format_str`` is a compile-time string then `make_args_checked` checks
+ its validity at compile time.
+ \endrst
+ */
+template <typename... Args, typename S, typename Char = char_t<S>>
+inline auto make_args_checked(const S& format_str,
+ const remove_reference_t<Args>&... args)
+ -> format_arg_store<buffer_context<Char>, remove_reference_t<Args>...> {
+ static_assert(
+ detail::count<(
+ std::is_base_of<detail::view, remove_reference_t<Args>>::value &&
+ std::is_reference<Args>::value)...>() == 0,
+ "passing views as lvalues is disallowed");
+ detail::check_format_string<Args...>(format_str);
+ return {args...};
+}
+
+/**
+ \rst
Returns a named argument to be used in a formatting function. It should only
be used in a call to a formatting function.
@@ -1729,7 +1929,14 @@ template <typename Context> class basic_format_args {
}
};
-/** An alias to ``basic_format_args<context>``. */
+#ifdef FMT_ARM_ABI_COMPATIBILITY
+/** An alias to ``basic_format_args<format_context>``. */
+// Separate types would result in shorter symbols but break ABI compatibility
+// between clang and gcc on ARM (#1919).
+using format_args = basic_format_args<format_context>;
+using wformat_args = basic_format_args<wformat_context>;
+#else
+// DEPRECATED! These are kept for ABI compatibility.
// It is a separate type rather than an alias to make symbols readable.
struct format_args : basic_format_args<format_context> {
template <typename... Args>
@@ -1738,31 +1945,9 @@ struct format_args : basic_format_args<format_context> {
struct wformat_args : basic_format_args<wformat_context> {
using basic_format_args::basic_format_args;
};
-
-namespace detail {
-
-// Reports a compile-time error if S is not a valid format string.
-template <typename..., typename S, FMT_ENABLE_IF(!is_compile_string<S>::value)>
-FMT_INLINE void check_format_string(const S&) {
-#ifdef FMT_ENFORCE_COMPILE_STRING
- static_assert(is_compile_string<S>::value,
- "FMT_ENFORCE_COMPILE_STRING requires all format strings to use "
- "FMT_STRING.");
#endif
-}
-template <typename..., typename S, FMT_ENABLE_IF(is_compile_string<S>::value)>
-void check_format_string(S);
-template <typename... Args, typename S, typename Char = char_t<S>>
-inline format_arg_store<buffer_context<Char>, remove_reference_t<Args>...>
-make_args_checked(const S& format_str,
- const remove_reference_t<Args>&... args) {
- static_assert(count<(std::is_base_of<view, remove_reference_t<Args>>::value &&
- std::is_reference<Args>::value)...>() == 0,
- "passing views as lvalues is disallowed");
- check_format_string<Args...>(format_str);
- return {args...};
-}
+namespace detail {
template <typename Char, FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
std::basic_string<Char> vformat(
@@ -1772,9 +1957,10 @@ std::basic_string<Char> vformat(
FMT_API std::string vformat(string_view format_str, format_args args);
template <typename Char>
-typename FMT_BUFFER_CONTEXT(Char)::iterator vformat_to(
+void vformat_to(
buffer<Char>& buf, basic_string_view<Char> format_str,
- basic_format_args<FMT_BUFFER_CONTEXT(type_identity_t<Char>)> args);
+ basic_format_args<FMT_BUFFER_CONTEXT(type_identity_t<Char>)> args,
+ detail::locale_ref loc = {});
template <typename Char, typename Args,
FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
@@ -1789,26 +1975,80 @@ inline void vprint_mojibake(std::FILE*, string_view, format_args) {}
/** Formats a string and writes the output to ``out``. */
// GCC 8 and earlier cannot handle std::back_insert_iterator<Container> with
// vformat_to<ArgFormatter>(...) overload, so SFINAE on iterator type instead.
-template <
- typename OutputIt, typename S, typename Char = char_t<S>,
- FMT_ENABLE_IF(detail::is_contiguous_back_insert_iterator<OutputIt>::value)>
-OutputIt vformat_to(
- OutputIt out, const S& format_str,
- basic_format_args<buffer_context<type_identity_t<Char>>> args) {
- auto& c = detail::get_container(out);
- detail::container_buffer<remove_reference_t<decltype(c)>> buf(c);
+template <typename OutputIt, typename S, typename Char = char_t<S>,
+ bool enable = detail::is_output_iterator<OutputIt, Char>::value>
+auto vformat_to(OutputIt out, const S& format_str,
+ basic_format_args<buffer_context<type_identity_t<Char>>> args)
+ -> typename std::enable_if<enable, OutputIt>::type {
+ decltype(detail::get_buffer<Char>(out)) buf(detail::get_buffer_init(out));
detail::vformat_to(buf, to_string_view(format_str), args);
- return out;
+ return detail::get_iterator(buf);
+}
+
+/**
+ \rst
+ Formats arguments, writes the result to the output iterator ``out`` and returns
+ the iterator past the end of the output range.
+
+ **Example**::
+
+ std::vector<char> out;
+ fmt::format_to(std::back_inserter(out), "{}", 42);
+ \endrst
+ */
+// We cannot use FMT_ENABLE_IF because of a bug in gcc 8.3.
+template <typename OutputIt, typename S, typename... Args,
+ bool enable = detail::is_output_iterator<OutputIt, char_t<S>>::value>
+inline auto format_to(OutputIt out, const S& format_str, Args&&... args) ->
+ typename std::enable_if<enable, OutputIt>::type {
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
+ return vformat_to(out, to_string_view(format_str), vargs);
+}
+
+template <typename OutputIt> struct format_to_n_result {
+ /** Iterator past the end of the output range. */
+ OutputIt out;
+ /** Total (not truncated) output size. */
+ size_t size;
+};
+
+template <typename OutputIt, typename Char, typename... Args,
+ FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value)>
+inline format_to_n_result<OutputIt> vformat_to_n(
+ OutputIt out, size_t n, basic_string_view<Char> format_str,
+ basic_format_args<buffer_context<type_identity_t<Char>>> args) {
+ detail::iterator_buffer<OutputIt, Char, detail::fixed_buffer_traits> buf(out,
+ n);
+ detail::vformat_to(buf, format_str, args);
+ return {buf.out(), buf.count()};
+}
+
+/**
+ \rst
+ Formats arguments, writes up to ``n`` characters of the result to the output
+ iterator ``out`` and returns the total output size and the iterator past the
+ end of the output range.
+ \endrst
+ */
+template <typename OutputIt, typename S, typename... Args,
+ bool enable = detail::is_output_iterator<OutputIt, char_t<S>>::value>
+inline auto format_to_n(OutputIt out, size_t n, const S& format_str,
+ const Args&... args) ->
+ typename std::enable_if<enable, format_to_n_result<OutputIt>>::type {
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
+ return vformat_to_n(out, n, to_string_view(format_str), vargs);
}
-template <typename Container, typename S, typename... Args,
- FMT_ENABLE_IF(
- is_contiguous<Container>::value&& detail::is_string<S>::value)>
-inline std::back_insert_iterator<Container> format_to(
- std::back_insert_iterator<Container> out, const S& format_str,
- Args&&... args) {
- return vformat_to(out, to_string_view(format_str),
- detail::make_args_checked<Args...>(format_str, args...));
+/**
+ Returns the number of characters in the output of
+ ``format(format_str, args...)``.
+ */
+template <typename... Args>
+inline size_t formatted_size(string_view format_str, Args&&... args) {
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
+ detail::counting_buffer<> buf;
+ detail::vformat_to(buf, format_str, vargs);
+ return buf.count();
}
template <typename S, typename Char = char_t<S>>
@@ -1832,7 +2072,7 @@ FMT_INLINE std::basic_string<Char> vformat(
// std::basic_string<char_t<S>> to reduce the symbol size.
template <typename S, typename... Args, typename Char = char_t<S>>
FMT_INLINE std::basic_string<Char> format(const S& format_str, Args&&... args) {
- const auto& vargs = detail::make_args_checked<Args...>(format_str, args...);
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
return detail::vformat(to_string_view(format_str), vargs);
}
@@ -1852,7 +2092,7 @@ FMT_API void vprint(std::FILE*, string_view, format_args);
*/
template <typename S, typename... Args, typename Char = char_t<S>>
inline void print(std::FILE* f, const S& format_str, Args&&... args) {
- const auto& vargs = detail::make_args_checked<Args...>(format_str, args...);
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
return detail::is_unicode<Char>()
? vprint(f, to_string_view(format_str), vargs)
: detail::vprint_mojibake(f, to_string_view(format_str), vargs);
@@ -1871,7 +2111,7 @@ inline void print(std::FILE* f, const S& format_str, Args&&... args) {
*/
template <typename S, typename... Args, typename Char = char_t<S>>
inline void print(const S& format_str, Args&&... args) {
- const auto& vargs = detail::make_args_checked<Args...>(format_str, args...);
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
return detail::is_unicode<Char>()
? vprint(to_string_view(format_str), vargs)
: detail::vprint_mojibake(stdout, to_string_view(format_str),
diff --git a/src/third_party/fmt/format-inl.h b/src/third_party/fmt/format-inl.h
index d8c9c8a..8f2fe73 100644
--- a/src/third_party/fmt/format-inl.h
+++ b/src/third_party/fmt/format-inl.h
@@ -13,32 +13,19 @@
#include <climits>
#include <cmath>
#include <cstdarg>
-#include <cstring> // for std::memmove
+#include <cstring> // std::memmove
#include <cwchar>
#include <exception>
-#include "format.h"
-#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR)
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
# include <locale>
#endif
#ifdef _WIN32
-# if !defined(NOMINMAX) && !defined(WIN32_LEAN_AND_MEAN)
-# define NOMINMAX
-# define WIN32_LEAN_AND_MEAN
-# include <windows.h>
-# undef WIN32_LEAN_AND_MEAN
-# undef NOMINMAX
-# else
-# include <windows.h>
-# endif
-# include <io.h>
+# include <io.h> // _isatty
#endif
-#ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable : 4702) // unreachable code
-#endif
+#include "format.h"
// Dummy implementations of strerror_r and strerror_s called if corresponding
// system functions are not available.
@@ -79,8 +66,8 @@ inline int fmt_snprintf(char* buffer, size_t size, const char* format, ...) {
// ERANGE - buffer is not large enough to store the error message
// other - failure
// Buffer should be at least of size 1.
-FMT_FUNC int safe_strerror(int error_code, char*& buffer,
- size_t buffer_size) FMT_NOEXCEPT {
+inline int safe_strerror(int error_code, char*& buffer,
+ size_t buffer_size) FMT_NOEXCEPT {
FMT_ASSERT(buffer != nullptr && buffer_size != 0, "invalid buffer");
class dispatcher {
@@ -145,7 +132,7 @@ FMT_FUNC void format_error_code(detail::buffer<char>& out, int error_code,
// Report error code making sure that the output fits into
// inline_buffer_size to avoid dynamic memory allocation and potential
// bad_alloc.
- out.resize(0);
+ out.try_resize(0);
static const char SEP[] = ": ";
static const char ERROR_STR[] = "error ";
// Subtract 2 to account for terminating null characters in SEP and ERROR_STR.
@@ -156,7 +143,7 @@ FMT_FUNC void format_error_code(detail::buffer<char>& out, int error_code,
++error_code_size;
}
error_code_size += detail::to_unsigned(detail::count_digits(abs_value));
- auto it = std::back_inserter(out);
+ auto it = buffer_appender<char>(out);
if (message.size() <= inline_buffer_size - error_code_size)
format_to(it, "{}{}", message, SEP);
format_to(it, "{}{}", ERROR_STR, error_code);
@@ -173,8 +160,8 @@ FMT_FUNC void report_error(format_func func, int error_code,
}
// A wrapper around fwrite that throws on error.
-FMT_FUNC void fwrite_fully(const void* ptr, size_t size, size_t count,
- FILE* stream) {
+inline void fwrite_fully(const void* ptr, size_t size, size_t count,
+ FILE* stream) {
size_t written = std::fwrite(ptr, size, count, stream);
if (written < count) FMT_THROW(system_error(errno, "cannot write to file"));
}
@@ -242,26 +229,23 @@ template <> FMT_FUNC int count_digits<4>(detail::fallback_uintptr n) {
template <typename T>
const typename basic_data<T>::digit_pair basic_data<T>::digits[] = {
- {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'},
- {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
- {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'},
- {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
- {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'},
- {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
- {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
- {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
- {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'},
- {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
- {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'},
- {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
- {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'},
- {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
- {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'},
- {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
- {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'},
- {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
- {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'},
- {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
+ {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'},
+ {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'},
+ {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'},
+ {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'},
+ {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
+ {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'},
+ {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
+ {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'},
+ {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'},
+ {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
+ {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'},
+ {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'},
+ {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'},
+ {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
+ {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
+ {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'},
+ {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
template <typename T>
const char basic_data<T>::hex_digits[] = "0123456789abcdef";
@@ -279,16 +263,24 @@ const uint64_t basic_data<T>::powers_of_10_64[] = {
template <typename T>
const uint32_t basic_data<T>::zero_or_powers_of_10_32[] = {0,
FMT_POWERS_OF_10(1)};
-
template <typename T>
const uint64_t basic_data<T>::zero_or_powers_of_10_64[] = {
0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL),
10000000000000000000ULL};
+template <typename T>
+const uint32_t basic_data<T>::zero_or_powers_of_10_32_new[] = {
+ 0, 0, FMT_POWERS_OF_10(1)};
+
+template <typename T>
+const uint64_t basic_data<T>::zero_or_powers_of_10_64_new[] = {
+ 0, 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL),
+ 10000000000000000000ULL};
+
// Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340.
// These are generated by support/compute-powers.py.
template <typename T>
-const uint64_t basic_data<T>::pow10_significands[] = {
+const uint64_t basic_data<T>::grisu_pow10_significands[] = {
0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76,
0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df,
0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c,
@@ -323,7 +315,7 @@ const uint64_t basic_data<T>::pow10_significands[] = {
// Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding
// to significands above.
template <typename T>
-const int16_t basic_data<T>::pow10_exponents[] = {
+const int16_t basic_data<T>::grisu_pow10_exponents[] = {
-1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954,
-927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661,
-635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369,
@@ -334,6 +326,744 @@ const int16_t basic_data<T>::pow10_exponents[] = {
827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066};
template <typename T>
+const divtest_table_entry<uint32_t> basic_data<T>::divtest_table_for_pow5_32[] =
+ {{0x00000001, 0xffffffff}, {0xcccccccd, 0x33333333},
+ {0xc28f5c29, 0x0a3d70a3}, {0x26e978d5, 0x020c49ba},
+ {0x3afb7e91, 0x0068db8b}, {0x0bcbe61d, 0x0014f8b5},
+ {0x68c26139, 0x000431bd}, {0xae8d46a5, 0x0000d6bf},
+ {0x22e90e21, 0x00002af3}, {0x3a2e9c6d, 0x00000897},
+ {0x3ed61f49, 0x000001b7}};
+
+template <typename T>
+const divtest_table_entry<uint64_t> basic_data<T>::divtest_table_for_pow5_64[] =
+ {{0x0000000000000001, 0xffffffffffffffff},
+ {0xcccccccccccccccd, 0x3333333333333333},
+ {0x8f5c28f5c28f5c29, 0x0a3d70a3d70a3d70},
+ {0x1cac083126e978d5, 0x020c49ba5e353f7c},
+ {0xd288ce703afb7e91, 0x0068db8bac710cb2},
+ {0x5d4e8fb00bcbe61d, 0x0014f8b588e368f0},
+ {0x790fb65668c26139, 0x000431bde82d7b63},
+ {0xe5032477ae8d46a5, 0x0000d6bf94d5e57a},
+ {0xc767074b22e90e21, 0x00002af31dc46118},
+ {0x8e47ce423a2e9c6d, 0x0000089705f4136b},
+ {0x4fa7f60d3ed61f49, 0x000001b7cdfd9d7b},
+ {0x0fee64690c913975, 0x00000057f5ff85e5},
+ {0x3662e0e1cf503eb1, 0x000000119799812d},
+ {0xa47a2cf9f6433fbd, 0x0000000384b84d09},
+ {0x54186f653140a659, 0x00000000b424dc35},
+ {0x7738164770402145, 0x0000000024075f3d},
+ {0xe4a4d1417cd9a041, 0x000000000734aca5},
+ {0xc75429d9e5c5200d, 0x000000000170ef54},
+ {0xc1773b91fac10669, 0x000000000049c977},
+ {0x26b172506559ce15, 0x00000000000ec1e4},
+ {0xd489e3a9addec2d1, 0x000000000002f394},
+ {0x90e860bb892c8d5d, 0x000000000000971d},
+ {0x502e79bf1b6f4f79, 0x0000000000001e39},
+ {0xdcd618596be30fe5, 0x000000000000060b}};
+
+template <typename T>
+const uint64_t basic_data<T>::dragonbox_pow10_significands_64[] = {
+ 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f,
+ 0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb,
+ 0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28,
+ 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb,
+ 0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a,
+ 0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810,
+ 0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff,
+ 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd,
+ 0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424,
+ 0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b,
+ 0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000,
+ 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000,
+ 0xc350000000000000, 0xf424000000000000, 0x9896800000000000,
+ 0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000,
+ 0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000,
+ 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000,
+ 0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000,
+ 0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000,
+ 0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0,
+ 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940984,
+ 0xa18f07d736b90be5, 0xc9f2c9cd04674ede, 0xfc6f7c4045812296,
+ 0x9dc5ada82b70b59d, 0xc5371912364ce305, 0xf684df56c3e01bc6,
+ 0x9a130b963a6c115c, 0xc097ce7bc90715b3, 0xf0bdc21abb48db20,
+ 0x96769950b50d88f4, 0xbc143fa4e250eb31, 0xeb194f8e1ae525fd,
+ 0x92efd1b8d0cf37be, 0xb7abc627050305ad, 0xe596b7b0c643c719,
+ 0x8f7e32ce7bea5c6f, 0xb35dbf821ae4f38b, 0xe0352f62a19e306e};
+
+template <typename T>
+const uint128_wrapper basic_data<T>::dragonbox_pow10_significands_128[] = {
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+ {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+ {0x9faacf3df73609b1, 0x77b191618c54e9ad},
+ {0xc795830d75038c1d, 0xd59df5b9ef6a2418},
+ {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e},
+ {0x9becce62836ac577, 0x4ee367f9430aec33},
+ {0xc2e801fb244576d5, 0x229c41f793cda740},
+ {0xf3a20279ed56d48a, 0x6b43527578c11110},
+ {0x9845418c345644d6, 0x830a13896b78aaaa},
+ {0xbe5691ef416bd60c, 0x23cc986bc656d554},
+ {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9},
+ {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa},
+ {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54},
+ {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69},
+ {0x91376c36d99995be, 0x23100809b9c21fa2},
+ {0xb58547448ffffb2d, 0xabd40a0c2832a78b},
+ {0xe2e69915b3fff9f9, 0x16c90c8f323f516d},
+ {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4},
+ {0xb1442798f49ffb4a, 0x99cd11cfdf41779d},
+ {0xdd95317f31c7fa1d, 0x40405643d711d584},
+ {0x8a7d3eef7f1cfc52, 0x482835ea666b2573},
+ {0xad1c8eab5ee43b66, 0xda3243650005eed0},
+ {0xd863b256369d4a40, 0x90bed43e40076a83},
+ {0x873e4f75e2224e68, 0x5a7744a6e804a292},
+ {0xa90de3535aaae202, 0x711515d0a205cb37},
+ {0xd3515c2831559a83, 0x0d5a5b44ca873e04},
+ {0x8412d9991ed58091, 0xe858790afe9486c3},
+ {0xa5178fff668ae0b6, 0x626e974dbe39a873},
+ {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+ {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a},
+ {0xa139029f6a239f72, 0x1c1fffc1ebc44e81},
+ {0xc987434744ac874e, 0xa327ffb266b56221},
+ {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9},
+ {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa},
+ {0xc4ce17b399107c22, 0xcb550fb4384d21d4},
+ {0xf6019da07f549b2b, 0x7e2a53a146606a49},
+ {0x99c102844f94e0fb, 0x2eda7444cbfc426e},
+ {0xc0314325637a1939, 0xfa911155fefb5309},
+ {0xf03d93eebc589f88, 0x793555ab7eba27cb},
+ {0x96267c7535b763b5, 0x4bc1558b2f3458df},
+ {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17},
+ {0xea9c227723ee8bcb, 0x465e15a979c1cadd},
+ {0x92a1958a7675175f, 0x0bfacd89ec191eca},
+ {0xb749faed14125d36, 0xcef980ec671f667c},
+ {0xe51c79a85916f484, 0x82b7e12780e7401b},
+ {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811},
+ {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16},
+ {0xdfbdcece67006ac9, 0x67a791e093e1d49b},
+ {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1},
+ {0xaecc49914078536d, 0x58fae9f773886e19},
+ {0xda7f5bf590966848, 0xaf39a475506a899f},
+ {0x888f99797a5e012d, 0x6d8406c952429604},
+ {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84},
+ {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65},
+ {0x855c3be0a17fcd26, 0x5cf2eea09a550680},
+ {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+ {0xd0601d8efc57b08b, 0xf13b94daf124da27},
+ {0x823c12795db6ce57, 0x76c53d08d6b70859},
+ {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f},
+ {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a},
+ {0xfe5d54150b090b02, 0xd3f93b35435d7c4d},
+ {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0},
+ {0xc6b8e9b0709f109a, 0x359ab6419ca1091c},
+ {0xf867241c8cc6d4c0, 0xc30163d203c94b63},
+ {0x9b407691d7fc44f8, 0x79e0de63425dcf1e},
+ {0xc21094364dfb5636, 0x985915fc12f542e5},
+ {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e},
+ {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43},
+ {0xbd8430bd08277231, 0x50c6ff782a838354},
+ {0xece53cec4a314ebd, 0xa4f8bf5635246429},
+ {0x940f4613ae5ed136, 0x871b7795e136be9a},
+ {0xb913179899f68584, 0x28e2557b59846e40},
+ {0xe757dd7ec07426e5, 0x331aeada2fe589d0},
+ {0x9096ea6f3848984f, 0x3ff0d2c85def7622},
+ {0xb4bca50b065abe63, 0x0fed077a756b53aa},
+ {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895},
+ {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d},
+ {0xb080392cc4349dec, 0xbd8d794d96aacfb4},
+ {0xdca04777f541c567, 0xecf0d7a0fc5583a1},
+ {0x89e42caaf9491b60, 0xf41686c49db57245},
+ {0xac5d37d5b79b6239, 0x311c2875c522ced6},
+ {0xd77485cb25823ac7, 0x7d633293366b828c},
+ {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+ {0xa8530886b54dbdeb, 0xd9f57f830283fdfd},
+ {0xd267caa862a12d66, 0xd072df63c324fd7c},
+ {0x8380dea93da4bc60, 0x4247cb9e59f71e6e},
+ {0xa46116538d0deb78, 0x52d9be85f074e609},
+ {0xcd795be870516656, 0x67902e276c921f8c},
+ {0x806bd9714632dff6, 0x00ba1cd8a3db53b7},
+ {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5},
+ {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce},
+ {0xfad2a4b13d1b5d6c, 0x796b805720085f82},
+ {0x9cc3a6eec6311a63, 0xcbe3303674053bb1},
+ {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d},
+ {0xf4f1b4d515acb93b, 0xee92fb5515482d45},
+ {0x991711052d8bf3c5, 0x751bdd152d4d1c4b},
+ {0xbf5cd54678eef0b6, 0xd262d45a78a0635e},
+ {0xef340a98172aace4, 0x86fb897116c87c35},
+ {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1},
+ {0xbae0a846d2195712, 0x8974836059cca10a},
+ {0xe998d258869facd7, 0x2bd1a438703fc94c},
+ {0x91ff83775423cc06, 0x7b6306a34627ddd0},
+ {0xb67f6455292cbf08, 0x1a3bc84c17b1d543},
+ {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94},
+ {0x8e938662882af53e, 0x547eb47b7282ee9d},
+ {0xb23867fb2a35b28d, 0xe99e619a4f23aa44},
+ {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5},
+ {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05},
+ {0xae0b158b4738705e, 0x9624ab50b148d446},
+ {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+ {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7},
+ {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d},
+ {0xd47487cc8470652b, 0x7647c32000696720},
+ {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074},
+ {0xa5fb0a17c777cf09, 0xf468107100525891},
+ {0xcf79cc9db955c2cc, 0x7182148d4066eeb5},
+ {0x81ac1fe293d599bf, 0xc6f14cd848405531},
+ {0xa21727db38cb002f, 0xb8ada00e5a506a7d},
+ {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d},
+ {0xfd442e4688bd304a, 0x908f4a166d1da664},
+ {0x9e4a9cec15763e2e, 0x9a598e4e043287ff},
+ {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe},
+ {0xf7549530e188c128, 0xd12bee59e68ef47d},
+ {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf},
+ {0xc13a148e3032d6e7, 0xe36a52363c1faf02},
+ {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2},
+ {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba},
+ {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8},
+ {0xebdf661791d60f56, 0x111b495b3464ad22},
+ {0x936b9fcebb25c995, 0xcab10dd900beec35},
+ {0xb84687c269ef3bfb, 0x3d5d514f40eea743},
+ {0xe65829b3046b0afa, 0x0cb4a5a3112a5113},
+ {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac},
+ {0xb3f4e093db73a093, 0x59ed216765690f57},
+ {0xe0f218b8d25088b8, 0x306869c13ec3532d},
+ {0x8c974f7383725573, 0x1e414218c73a13fc},
+ {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+ {0xdbac6c247d62a583, 0xdf45f746b74abf3a},
+ {0x894bc396ce5da772, 0x6b8bba8c328eb784},
+ {0xab9eb47c81f5114f, 0x066ea92f3f326565},
+ {0xd686619ba27255a2, 0xc80a537b0efefebe},
+ {0x8613fd0145877585, 0xbd06742ce95f5f37},
+ {0xa798fc4196e952e7, 0x2c48113823b73705},
+ {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6},
+ {0x82ef85133de648c4, 0x9a984d73dbe722fc},
+ {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb},
+ {0xcc963fee10b7d1b3, 0x318df905079926a9},
+ {0xffbbcfe994e5c61f, 0xfdf17746497f7053},
+ {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634},
+ {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1},
+ {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1},
+ {0x9c1661a651213e2d, 0x06bea10ca65c084f},
+ {0xc31bfa0fe5698db8, 0x486e494fcff30a63},
+ {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb},
+ {0x986ddb5c6b3a76b7, 0xf89629465a75e01d},
+ {0xbe89523386091465, 0xf6bbb397f1135824},
+ {0xee2ba6c0678b597f, 0x746aa07ded582e2d},
+ {0x94db483840b717ef, 0xa8c2a44eb4571cdd},
+ {0xba121a4650e4ddeb, 0x92f34d62616ce414},
+ {0xe896a0d7e51e1566, 0x77b020baf9c81d18},
+ {0x915e2486ef32cd60, 0x0ace1474dc1d122f},
+ {0xb5b5ada8aaff80b8, 0x0d819992132456bb},
+ {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a},
+ {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+ {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3},
+ {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf},
+ {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c},
+ {0xad4ab7112eb3929d, 0x86c16c98d2c953c7},
+ {0xd89d64d57a607744, 0xe871c7bf077ba8b8},
+ {0x87625f056c7c4a8b, 0x11471cd764ad4973},
+ {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0},
+ {0xd389b47879823479, 0x4aff1d108d4ec2c4},
+ {0x843610cb4bf160cb, 0xcedf722a585139bb},
+ {0xa54394fe1eedb8fe, 0xc2974eb4ee658829},
+ {0xce947a3da6a9273e, 0x733d226229feea33},
+ {0x811ccc668829b887, 0x0806357d5a3f5260},
+ {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8},
+ {0xc9bcff6034c13052, 0xfc89b393dd02f0b6},
+ {0xfc2c3f3841f17c67, 0xbbac2078d443ace3},
+ {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e},
+ {0xc5029163f384a931, 0x0a9e795e65d4df12},
+ {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6},
+ {0x99ea0196163fa42e, 0x504bced1bf8e4e46},
+ {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7},
+ {0xf07da27a82c37088, 0x5d767327bb4e5a4d},
+ {0x964e858c91ba2655, 0x3a6a07f8d510f870},
+ {0xbbe226efb628afea, 0x890489f70a55368c},
+ {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f},
+ {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e},
+ {0xb77ada0617e3bbcb, 0x09ce6ebb40173745},
+ {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+ {0x8f57fa54c2a9eab6, 0x9fa946824a12232e},
+ {0xb32df8e9f3546564, 0x47939822dc96abfa},
+ {0xdff9772470297ebd, 0x59787e2b93bc56f8},
+ {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b},
+ {0xaefae51477a06b03, 0xede622920b6b23f2},
+ {0xdab99e59958885c4, 0xe95fab368e45ecee},
+ {0x88b402f7fd75539b, 0x11dbcb0218ebb415},
+ {0xaae103b5fcd2a881, 0xd652bdc29f26a11a},
+ {0xd59944a37c0752a2, 0x4be76d3346f04960},
+ {0x857fcae62d8493a5, 0x6f70a4400c562ddc},
+ {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953},
+ {0xd097ad07a71f26b2, 0x7e2000a41346a7a8},
+ {0x825ecc24c873782f, 0x8ed400668c0c28c9},
+ {0xa2f67f2dfa90563b, 0x728900802f0f32fb},
+ {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba},
+ {0xfea126b7d78186bc, 0xe2f610c84987bfa9},
+ {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca},
+ {0xc6ede63fa05d3143, 0x91503d1c79720dbc},
+ {0xf8a95fcf88747d94, 0x75a44c6397ce912b},
+ {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb},
+ {0xc24452da229b021b, 0xfbe85badce996169},
+ {0xf2d56790ab41c2a2, 0xfae27299423fb9c4},
+ {0x97c560ba6b0919a5, 0xdccd879fc967d41b},
+ {0xbdb6b8e905cb600f, 0x5400e987bbc1c921},
+ {0xed246723473e3813, 0x290123e9aab23b69},
+ {0x9436c0760c86e30b, 0xf9a0b6720aaf6522},
+ {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+ {0xe7958cb87392c2c2, 0xb60b1d1230b20e05},
+ {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3},
+ {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4},
+ {0xe2280b6c20dd5232, 0x25c6da63c38de1b1},
+ {0x8d590723948a535f, 0x579c487e5a38ad0f},
+ {0xb0af48ec79ace837, 0x2d835a9df0c6d852},
+ {0xdcdb1b2798182244, 0xf8e431456cf88e66},
+ {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900},
+ {0xac8b2d36eed2dac5, 0xe272467e3d222f40},
+ {0xd7adf884aa879177, 0x5b0ed81dcc6abb10},
+ {0x86ccbb52ea94baea, 0x98e947129fc2b4ea},
+ {0xa87fea27a539e9a5, 0x3f2398d747b36225},
+ {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae},
+ {0x83a3eeeef9153e89, 0x1953cf68300424ad},
+ {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8},
+ {0xcdb02555653131b6, 0x3792f412cb06794e},
+ {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1},
+ {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5},
+ {0xc8de047564d20a8b, 0xf245825a5a445276},
+ {0xfb158592be068d2e, 0xeed6e2f0f0d56713},
+ {0x9ced737bb6c4183d, 0x55464dd69685606c},
+ {0xc428d05aa4751e4c, 0xaa97e14c3c26b887},
+ {0xf53304714d9265df, 0xd53dd99f4b3066a9},
+ {0x993fe2c6d07b7fab, 0xe546a8038efe402a},
+ {0xbf8fdb78849a5f96, 0xde98520472bdd034},
+ {0xef73d256a5c0f77c, 0x963e66858f6d4441},
+ {0x95a8637627989aad, 0xdde7001379a44aa9},
+ {0xbb127c53b17ec159, 0x5560c018580d5d53},
+ {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7},
+ {0x9226712162ab070d, 0xcab3961304ca70e9},
+ {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23},
+ {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b},
+ {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243},
+ {0xb267ed1940f1c61c, 0x55f038b237591ed4},
+ {0xdf01e85f912e37a3, 0x6b6c46dec52f6689},
+ {0x8b61313bbabce2c6, 0x2323ac4b3b3da016},
+ {0xae397d8aa96c1b77, 0xabec975e0a0d081b},
+ {0xd9c7dced53c72255, 0x96e7bd358c904a22},
+ {0x881cea14545c7575, 0x7e50d64177da2e55},
+ {0xaa242499697392d2, 0xdde50bd1d5d0b9ea},
+ {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865},
+ {0x84ec3c97da624ab4, 0xbd5af13bef0b113f},
+ {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f},
+ {0xcfb11ead453994ba, 0x67de18eda5814af3},
+ {0x81ceb32c4b43fcf4, 0x80eacf948770ced8},
+ {0xa2425ff75e14fc31, 0xa1258379a94d028e},
+ {0xcad2f7f5359a3b3e, 0x096ee45813a04331},
+ {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd},
+ {0x9e74d1b791e07e48, 0x775ea264cf55347e},
+ {0xc612062576589dda, 0x95364afe032a819e},
+ {0xf79687aed3eec551, 0x3a83ddbd83f52205},
+ {0x9abe14cd44753b52, 0xc4926a9672793543},
+ {0xc16d9a0095928a27, 0x75b7053c0f178294},
+ {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+ {0x971da05074da7bee, 0xd3f6fc16ebca5e04},
+ {0xbce5086492111aea, 0x88f4bb1ca6bcf585},
+ {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6},
+ {0x9392ee8e921d5d07, 0x3aff322e62439fd0},
+ {0xb877aa3236a4b449, 0x09befeb9fad487c3},
+ {0xe69594bec44de15b, 0x4c2ebe687989a9b4},
+ {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11},
+ {0xb424dc35095cd80f, 0x538484c19ef38c95},
+ {0xe12e13424bb40e13, 0x2865a5f206b06fba},
+ {0x8cbccc096f5088cb, 0xf93f87b7442e45d4},
+ {0xafebff0bcb24aafe, 0xf78f69a51539d749},
+ {0xdbe6fecebdedd5be, 0xb573440e5a884d1c},
+ {0x89705f4136b4a597, 0x31680a88f8953031},
+ {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e},
+ {0xd6bf94d5e57a42bc, 0x3d32907604691b4d},
+ {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110},
+ {0xa7c5ac471b478423, 0x0fcf80dc33721d54},
+ {0xd1b71758e219652b, 0xd3c36113404ea4a9},
+ {0x83126e978d4fdf3b, 0x645a1cac083126ea},
+ {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4},
+ {0xcccccccccccccccc, 0xcccccccccccccccd},
+ {0x8000000000000000, 0x0000000000000000},
+ {0xa000000000000000, 0x0000000000000000},
+ {0xc800000000000000, 0x0000000000000000},
+ {0xfa00000000000000, 0x0000000000000000},
+ {0x9c40000000000000, 0x0000000000000000},
+ {0xc350000000000000, 0x0000000000000000},
+ {0xf424000000000000, 0x0000000000000000},
+ {0x9896800000000000, 0x0000000000000000},
+ {0xbebc200000000000, 0x0000000000000000},
+ {0xee6b280000000000, 0x0000000000000000},
+ {0x9502f90000000000, 0x0000000000000000},
+ {0xba43b74000000000, 0x0000000000000000},
+ {0xe8d4a51000000000, 0x0000000000000000},
+ {0x9184e72a00000000, 0x0000000000000000},
+ {0xb5e620f480000000, 0x0000000000000000},
+ {0xe35fa931a0000000, 0x0000000000000000},
+ {0x8e1bc9bf04000000, 0x0000000000000000},
+ {0xb1a2bc2ec5000000, 0x0000000000000000},
+ {0xde0b6b3a76400000, 0x0000000000000000},
+ {0x8ac7230489e80000, 0x0000000000000000},
+ {0xad78ebc5ac620000, 0x0000000000000000},
+ {0xd8d726b7177a8000, 0x0000000000000000},
+ {0x878678326eac9000, 0x0000000000000000},
+ {0xa968163f0a57b400, 0x0000000000000000},
+ {0xd3c21bcecceda100, 0x0000000000000000},
+ {0x84595161401484a0, 0x0000000000000000},
+ {0xa56fa5b99019a5c8, 0x0000000000000000},
+ {0xcecb8f27f4200f3a, 0x0000000000000000},
+ {0x813f3978f8940984, 0x4000000000000000},
+ {0xa18f07d736b90be5, 0x5000000000000000},
+ {0xc9f2c9cd04674ede, 0xa400000000000000},
+ {0xfc6f7c4045812296, 0x4d00000000000000},
+ {0x9dc5ada82b70b59d, 0xf020000000000000},
+ {0xc5371912364ce305, 0x6c28000000000000},
+ {0xf684df56c3e01bc6, 0xc732000000000000},
+ {0x9a130b963a6c115c, 0x3c7f400000000000},
+ {0xc097ce7bc90715b3, 0x4b9f100000000000},
+ {0xf0bdc21abb48db20, 0x1e86d40000000000},
+ {0x96769950b50d88f4, 0x1314448000000000},
+ {0xbc143fa4e250eb31, 0x17d955a000000000},
+ {0xeb194f8e1ae525fd, 0x5dcfab0800000000},
+ {0x92efd1b8d0cf37be, 0x5aa1cae500000000},
+ {0xb7abc627050305ad, 0xf14a3d9e40000000},
+ {0xe596b7b0c643c719, 0x6d9ccd05d0000000},
+ {0x8f7e32ce7bea5c6f, 0xe4820023a2000000},
+ {0xb35dbf821ae4f38b, 0xdda2802c8a800000},
+ {0xe0352f62a19e306e, 0xd50b2037ad200000},
+ {0x8c213d9da502de45, 0x4526f422cc340000},
+ {0xaf298d050e4395d6, 0x9670b12b7f410000},
+ {0xdaf3f04651d47b4c, 0x3c0cdd765f114000},
+ {0x88d8762bf324cd0f, 0xa5880a69fb6ac800},
+ {0xab0e93b6efee0053, 0x8eea0d047a457a00},
+ {0xd5d238a4abe98068, 0x72a4904598d6d880},
+ {0x85a36366eb71f041, 0x47a6da2b7f864750},
+ {0xa70c3c40a64e6c51, 0x999090b65f67d924},
+ {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d},
+ {0x82818f1281ed449f, 0xbff8f10e7a8921a4},
+ {0xa321f2d7226895c7, 0xaff72d52192b6a0d},
+ {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490},
+ {0xfee50b7025c36a08, 0x02f236d04753d5b4},
+ {0x9f4f2726179a2245, 0x01d762422c946590},
+ {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5},
+ {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2},
+ {0x9b934c3b330c8577, 0x63cc55f49f88eb2f},
+ {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb},
+ {0xf316271c7fc3908a, 0x8bef464e3945ef7a},
+ {0x97edd871cfda3a56, 0x97758bf0e3cbb5ac},
+ {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317},
+ {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd},
+ {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a},
+ {0xb975d6b6ee39e436, 0xb3e2fd538e122b44},
+ {0xe7d34c64a9c85d44, 0x60dbbca87196b616},
+ {0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd},
+ {0xb51d13aea4a488dd, 0x6babab6398bdbe41},
+ {0xe264589a4dcdab14, 0xc696963c7eed2dd1},
+ {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2},
+ {0xb0de65388cc8ada8, 0x3b25a55f43294bcb},
+ {0xdd15fe86affad912, 0x49ef0eb713f39ebe},
+ {0x8a2dbf142dfcc7ab, 0x6e3569326c784337},
+ {0xacb92ed9397bf996, 0x49c2c37f07965404},
+ {0xd7e77a8f87daf7fb, 0xdc33745ec97be906},
+ {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3},
+ {0xa8acd7c0222311bc, 0xc40832ea0d68ce0c},
+ {0xd2d80db02aabd62b, 0xf50a3fa490c30190},
+ {0x83c7088e1aab65db, 0x792667c6da79e0fa},
+ {0xa4b8cab1a1563f52, 0x577001b891185938},
+ {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86},
+ {0x80b05e5ac60b6178, 0x544f8158315b05b4},
+ {0xa0dc75f1778e39d6, 0x696361ae3db1c721},
+ {0xc913936dd571c84c, 0x03bc3a19cd1e38e9},
+ {0xfb5878494ace3a5f, 0x04ab48a04065c723},
+ {0x9d174b2dcec0e47b, 0x62eb0d64283f9c76},
+ {0xc45d1df942711d9a, 0x3ba5d0bd324f8394},
+ {0xf5746577930d6500, 0xca8f44ec7ee36479},
+ {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb},
+ {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e},
+ {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e},
+ {0x95d04aee3b80ece5, 0xbba1f1d158724a12},
+ {0xbb445da9ca61281f, 0x2a8a6e45ae8edc97},
+ {0xea1575143cf97226, 0xf52d09d71a3293bd},
+ {0x924d692ca61be758, 0x593c2626705f9c56},
+ {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c},
+ {0xe498f455c38b997a, 0x0b6dfb9c0f956447},
+ {0x8edf98b59a373fec, 0x4724bd4189bd5eac},
+ {0xb2977ee300c50fe7, 0x58edec91ec2cb657},
+ {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed},
+ {0x8b865b215899f46c, 0xbd79e0d20082ee74},
+ {0xae67f1e9aec07187, 0xecd8590680a3aa11},
+ {0xda01ee641a708de9, 0xe80e6f4820cc9495},
+ {0x884134fe908658b2, 0x3109058d147fdcdd},
+ {0xaa51823e34a7eede, 0xbd4b46f0599fd415},
+ {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a},
+ {0x850fadc09923329e, 0x03e2cf6bc604ddb0},
+ {0xa6539930bf6bff45, 0x84db8346b786151c},
+ {0xcfe87f7cef46ff16, 0xe612641865679a63},
+ {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e},
+ {0xa26da3999aef7749, 0xe3be5e330f38f09d},
+ {0xcb090c8001ab551c, 0x5cadf5bfd3072cc5},
+ {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6},
+ {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa},
+ {0xc646d63501a1511d, 0xb281e1fd541501b8},
+ {0xf7d88bc24209a565, 0x1f225a7ca91a4226},
+ {0x9ae757596946075f, 0x3375788de9b06958},
+ {0xc1a12d2fc3978937, 0x0052d6b1641c83ae},
+ {0xf209787bb47d6b84, 0xc0678c5dbd23a49a},
+ {0x9745eb4d50ce6332, 0xf840b7ba963646e0},
+ {0xbd176620a501fbff, 0xb650e5a93bc3d898},
+ {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe},
+ {0x93ba47c980e98cdf, 0xc66f336c36b10137},
+ {0xb8a8d9bbe123f017, 0xb80b0047445d4184},
+ {0xe6d3102ad96cec1d, 0xa60dc059157491e5},
+ {0x9043ea1ac7e41392, 0x87c89837ad68db2f},
+ {0xb454e4a179dd1877, 0x29babe4598c311fb},
+ {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a},
+ {0x8ce2529e2734bb1d, 0x1899e4a65f58660c},
+ {0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f},
+ {0xdc21a1171d42645d, 0x76707543f4fa1f73},
+ {0x899504ae72497eba, 0x6a06494a791c53a8},
+ {0xabfa45da0edbde69, 0x0487db9d17636892},
+ {0xd6f8d7509292d603, 0x45a9d2845d3c42b6},
+ {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2},
+ {0xa7f26836f282b732, 0x8e6cac7768d7141e},
+ {0xd1ef0244af2364ff, 0x3207d795430cd926},
+ {0x8335616aed761f1f, 0x7f44e6bd49e807b8},
+ {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6},
+ {0xcd036837130890a1, 0x36dba887c37a8c0f},
+ {0x802221226be55a64, 0xc2494954da2c9789},
+ {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c},
+ {0xc83553c5c8965d3d, 0x6f92829494e5acc7},
+ {0xfa42a8b73abbf48c, 0xcb772339ba1f17f9},
+ {0x9c69a97284b578d7, 0xff2a760414536efb},
+ {0xc38413cf25e2d70d, 0xfef5138519684aba},
+ {0xf46518c2ef5b8cd1, 0x7eb258665fc25d69},
+ {0x98bf2f79d5993802, 0xef2f773ffbd97a61},
+ {0xbeeefb584aff8603, 0xaafb550ffacfd8fa},
+ {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38},
+ {0x952ab45cfa97a0b2, 0xdd945a747bf26183},
+ {0xba756174393d88df, 0x94f971119aeef9e4},
+ {0xe912b9d1478ceb17, 0x7a37cd5601aab85d},
+ {0x91abb422ccb812ee, 0xac62e055c10ab33a},
+ {0xb616a12b7fe617aa, 0x577b986b314d6009},
+ {0xe39c49765fdf9d94, 0xed5a7e85fda0b80b},
+ {0x8e41ade9fbebc27d, 0x14588f13be847307},
+ {0xb1d219647ae6b31c, 0x596eb2d8ae258fc8},
+ {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb},
+ {0x8aec23d680043bee, 0x25de7bb9480d5854},
+ {0xada72ccc20054ae9, 0xaf561aa79a10ae6a},
+ {0xd910f7ff28069da4, 0x1b2ba1518094da04},
+ {0x87aa9aff79042286, 0x90fb44d2f05d0842},
+ {0xa99541bf57452b28, 0x353a1607ac744a53},
+ {0xd3fa922f2d1675f2, 0x42889b8997915ce8},
+ {0x847c9b5d7c2e09b7, 0x69956135febada11},
+ {0xa59bc234db398c25, 0x43fab9837e699095},
+ {0xcf02b2c21207ef2e, 0x94f967e45e03f4bb},
+ {0x8161afb94b44f57d, 0x1d1be0eebac278f5},
+ {0xa1ba1ba79e1632dc, 0x6462d92a69731732},
+ {0xca28a291859bbf93, 0x7d7b8f7503cfdcfe},
+ {0xfcb2cb35e702af78, 0x5cda735244c3d43e},
+ {0x9defbf01b061adab, 0x3a0888136afa64a7},
+ {0xc56baec21c7a1916, 0x088aaa1845b8fdd0},
+ {0xf6c69a72a3989f5b, 0x8aad549e57273d45},
+ {0x9a3c2087a63f6399, 0x36ac54e2f678864b},
+ {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd},
+ {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5},
+ {0x969eb7c47859e743, 0x9f644ae5a4b1b325},
+ {0xbc4665b596706114, 0x873d5d9f0dde1fee},
+ {0xeb57ff22fc0c7959, 0xa90cb506d155a7ea},
+ {0x9316ff75dd87cbd8, 0x09a7f12442d588f2},
+ {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb2f},
+ {0xe5d3ef282a242e81, 0x8f1668c8a86da5fa},
+ {0x8fa475791a569d10, 0xf96e017d694487bc},
+ {0xb38d92d760ec4455, 0x37c981dcc395a9ac},
+ {0xe070f78d3927556a, 0x85bbe253f47b1417},
+ {0x8c469ab843b89562, 0x93956d7478ccec8e},
+ {0xaf58416654a6babb, 0x387ac8d1970027b2},
+ {0xdb2e51bfe9d0696a, 0x06997b05fcc0319e},
+ {0x88fcf317f22241e2, 0x441fece3bdf81f03},
+ {0xab3c2fddeeaad25a, 0xd527e81cad7626c3},
+ {0xd60b3bd56a5586f1, 0x8a71e223d8d3b074},
+ {0x85c7056562757456, 0xf6872d5667844e49},
+ {0xa738c6bebb12d16c, 0xb428f8ac016561db},
+ {0xd106f86e69d785c7, 0xe13336d701beba52},
+ {0x82a45b450226b39c, 0xecc0024661173473},
+ {0xa34d721642b06084, 0x27f002d7f95d0190},
+ {0xcc20ce9bd35c78a5, 0x31ec038df7b441f4},
+ {0xff290242c83396ce, 0x7e67047175a15271},
+ {0x9f79a169bd203e41, 0x0f0062c6e984d386},
+ {0xc75809c42c684dd1, 0x52c07b78a3e60868},
+ {0xf92e0c3537826145, 0xa7709a56ccdf8a82},
+ {0x9bbcc7a142b17ccb, 0x88a66076400bb691},
+ {0xc2abf989935ddbfe, 0x6acff893d00ea435},
+ {0xf356f7ebf83552fe, 0x0583f6b8c4124d43},
+ {0x98165af37b2153de, 0xc3727a337a8b704a},
+ {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c},
+ {0xeda2ee1c7064130c, 0x1162def06f79df73},
+ {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8},
+ {0xb9a74a0637ce2ee1, 0x6d953e2bd7173692},
+ {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437},
+ {0x910ab1d4db9914a0, 0x1d9c9892400a22a2},
+ {0xb54d5e4a127f59c8, 0x2503beb6d00cab4b},
+ {0xe2a0b5dc971f303a, 0x2e44ae64840fd61d},
+ {0x8da471a9de737e24, 0x5ceaecfed289e5d2},
+ {0xb10d8e1456105dad, 0x7425a83e872c5f47},
+ {0xdd50f1996b947518, 0xd12f124e28f77719},
+ {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f},
+ {0xace73cbfdc0bfb7b, 0x636cc64d1001550b},
+ {0xd8210befd30efa5a, 0x3c47f7e05401aa4e},
+ {0x8714a775e3e95c78, 0x65acfaec34810a71},
+ {0xa8d9d1535ce3b396, 0x7f1839a741a14d0d},
+ {0xd31045a8341ca07c, 0x1ede48111209a050},
+ {0x83ea2b892091e44d, 0x934aed0aab460432},
+ {0xa4e4b66b68b65d60, 0xf81da84d5617853f},
+ {0xce1de40642e3f4b9, 0x36251260ab9d668e},
+ {0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019},
+ {0xa1075a24e4421730, 0xb24cf65b8612f81f},
+ {0xc94930ae1d529cfc, 0xdee033f26797b627},
+ {0xfb9b7cd9a4a7443c, 0x169840ef017da3b1},
+ {0x9d412e0806e88aa5, 0x8e1f289560ee864e},
+ {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2},
+ {0xf5b5d7ec8acb58a2, 0xae10af696774b1db},
+ {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29},
+ {0xbff610b0cc6edd3f, 0x17fd090a58d32af3},
+ {0xeff394dcff8a948e, 0xddfc4b4cef07f5b0},
+ {0x95f83d0a1fb69cd9, 0x4abdaf101564f98e},
+ {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1},
+ {0xea53df5fd18d5513, 0x84c86189216dc5ed},
+ {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4},
+ {0xb7118682dbb66a77, 0x3fbc8c33221dc2a1},
+ {0xe4d5e82392a40515, 0x0fabaf3feaa5334a},
+ {0x8f05b1163ba6832d, 0x29cb4d87f2a7400e},
+ {0xb2c71d5bca9023f8, 0x743e20e9ef511012},
+ {0xdf78e4b2bd342cf6, 0x914da9246b255416},
+ {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e},
+ {0xae9672aba3d0c320, 0xa184ac2473b529b1},
+ {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e},
+ {0x8865899617fb1871, 0x7e2fa67c7a658892},
+ {0xaa7eebfb9df9de8d, 0xddbb901b98feeab7},
+ {0xd51ea6fa85785631, 0x552a74227f3ea565},
+ {0x8533285c936b35de, 0xd53a88958f87275f},
+ {0xa67ff273b8460356, 0x8a892abaf368f137},
+ {0xd01fef10a657842c, 0x2d2b7569b0432d85},
+ {0x8213f56a67f6b29b, 0x9c3b29620e29fc73},
+ {0xa298f2c501f45f42, 0x8349f3ba91b47b8f},
+ {0xcb3f2f7642717713, 0x241c70a936219a73},
+ {0xfe0efb53d30dd4d7, 0xed238cd383aa0110},
+ {0x9ec95d1463e8a506, 0xf4363804324a40aa},
+ {0xc67bb4597ce2ce48, 0xb143c6053edcd0d5},
+ {0xf81aa16fdc1b81da, 0xdd94b7868e94050a},
+ {0x9b10a4e5e9913128, 0xca7cf2b4191c8326},
+ {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0},
+ {0xf24a01a73cf2dccf, 0xbc633b39673c8cec},
+ {0x976e41088617ca01, 0xd5be0503e085d813},
+ {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18},
+ {0xec9c459d51852ba2, 0xddf8e7d60ed1219e},
+ {0x93e1ab8252f33b45, 0xcabb90e5c942b503},
+ {0xb8da1662e7b00a17, 0x3d6a751f3b936243},
+ {0xe7109bfba19c0c9d, 0x0cc512670a783ad4},
+ {0x906a617d450187e2, 0x27fb2b80668b24c5},
+ {0xb484f9dc9641e9da, 0xb1f9f660802dedf6},
+ {0xe1a63853bbd26451, 0x5e7873f8a0396973},
+ {0x8d07e33455637eb2, 0xdb0b487b6423e1e8},
+ {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62},
+ {0xdc5c5301c56b75f7, 0x7641a140cc7810fb},
+ {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d},
+ {0xac2820d9623bf429, 0x546345fa9fbdcd44},
+ {0xd732290fbacaf133, 0xa97c177947ad4095},
+ {0x867f59a9d4bed6c0, 0x49ed8eabcccc485d},
+ {0xa81f301449ee8c70, 0x5c68f256bfff5a74},
+ {0xd226fc195c6a2f8c, 0x73832eec6fff3111},
+ {0x83585d8fd9c25db7, 0xc831fd53c5ff7eab},
+ {0xa42e74f3d032f525, 0xba3e7ca8b77f5e55},
+ {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb},
+ {0x80444b5e7aa7cf85, 0x7980d163cf5b81b3},
+ {0xa0555e361951c366, 0xd7e105bcc332621f},
+ {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7},
+ {0xfa856334878fc150, 0xb14f98f6f0feb951},
+ {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3},
+ {0xc3b8358109e84f07, 0x0a862f80ec4700c8},
+ {0xf4a642e14c6262c8, 0xcd27bb612758c0fa},
+ {0x98e7e9cccfbd7dbd, 0x8038d51cb897789c},
+ {0xbf21e44003acdd2c, 0xe0470a63e6bd56c3},
+ {0xeeea5d5004981478, 0x1858ccfce06cac74},
+ {0x95527a5202df0ccb, 0x0f37801e0c43ebc8},
+ {0xbaa718e68396cffd, 0xd30560258f54e6ba},
+ {0xe950df20247c83fd, 0x47c6b82ef32a2069},
+ {0x91d28b7416cdd27e, 0x4cdc331d57fa5441},
+ {0xb6472e511c81471d, 0xe0133fe4adf8e952},
+ {0xe3d8f9e563a198e5, 0x58180fddd97723a6},
+ {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648},
+ {0xb201833b35d63f73, 0x2cd2cc6551e513da},
+ {0xde81e40a034bcf4f, 0xf8077f7ea65e58d1},
+ {0x8b112e86420f6191, 0xfb04afaf27faf782},
+ {0xadd57a27d29339f6, 0x79c5db9af1f9b563},
+ {0xd94ad8b1c7380874, 0x18375281ae7822bc},
+ {0x87cec76f1c830548, 0x8f2293910d0b15b5},
+ {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb22},
+ {0xd433179d9c8cb841, 0x5fa60692a46151eb},
+ {0x849feec281d7f328, 0xdbc7c41ba6bcd333},
+ {0xa5c7ea73224deff3, 0x12b9b522906c0800},
+ {0xcf39e50feae16bef, 0xd768226b34870a00},
+ {0x81842f29f2cce375, 0xe6a1158300d46640},
+ {0xa1e53af46f801c53, 0x60495ae3c1097fd0},
+ {0xca5e89b18b602368, 0x385bb19cb14bdfc4},
+ {0xfcf62c1dee382c42, 0x46729e03dd9ed7b5},
+ {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d1},
+ {0xc5a05277621be293, 0xc7098b7305241885},
+ {0xf70867153aa2db38, 0xb8cbee4fc66d1ea7}
+#else
+ {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+ {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+ {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+ {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+ {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+ {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+ {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+ {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+ {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+ {0x95a8637627989aad, 0xdde7001379a44aa9},
+ {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+ {0xc350000000000000, 0x0000000000000000},
+ {0x9dc5ada82b70b59d, 0xf020000000000000},
+ {0xfee50b7025c36a08, 0x02f236d04753d5b4},
+ {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86},
+ {0xa6539930bf6bff45, 0x84db8346b786151c},
+ {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2},
+ {0xd910f7ff28069da4, 0x1b2ba1518094da04},
+ {0xaf58416654a6babb, 0x387ac8d1970027b2},
+ {0x8da471a9de737e24, 0x5ceaecfed289e5d2},
+ {0xe4d5e82392a40515, 0x0fabaf3feaa5334a},
+ {0xb8da1662e7b00a17, 0x3d6a751f3b936243},
+ {0x95527a5202df0ccb, 0x0f37801e0c43ebc8}
+#endif
+};
+
+#if !FMT_USE_FULL_CACHE_DRAGONBOX
+template <typename T>
+const uint64_t basic_data<T>::powers_of_5_64[] = {
+ 0x0000000000000001, 0x0000000000000005, 0x0000000000000019,
+ 0x000000000000007d, 0x0000000000000271, 0x0000000000000c35,
+ 0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1,
+ 0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd,
+ 0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9,
+ 0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5,
+ 0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631,
+ 0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed,
+ 0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9};
+
+template <typename T>
+const uint32_t basic_data<T>::dragonbox_pow10_recovery_errors[] = {
+ 0x50001400, 0x54044100, 0x54014555, 0x55954415, 0x54115555, 0x00000001,
+ 0x50000000, 0x00104000, 0x54010004, 0x05004001, 0x55555544, 0x41545555,
+ 0x54040551, 0x15445545, 0x51555514, 0x10000015, 0x00101100, 0x01100015,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04450514, 0x45414110,
+ 0x55555145, 0x50544050, 0x15040155, 0x11054140, 0x50111514, 0x11451454,
+ 0x00400541, 0x00000000, 0x55555450, 0x10056551, 0x10054011, 0x55551014,
+ 0x69514555, 0x05151109, 0x00155555};
+#endif
+
+template <typename T>
const char basic_data<T>::foreground_color[] = "\x1b[38;2;";
template <typename T>
const char basic_data<T>::background_color[] = "\x1b[48;2;";
@@ -366,6 +1096,10 @@ class fp {
private:
using significand_type = uint64_t;
+ template <typename Float>
+ using is_supported_float = bool_constant<sizeof(Float) == sizeof(uint64_t) ||
+ sizeof(Float) == sizeof(uint32_t)>;
+
public:
significand_type f;
int e;
@@ -388,63 +1122,38 @@ class fp {
template <typename Double> explicit fp(Double d) { assign(d); }
// Assigns d to this and return true iff predecessor is closer than successor.
- template <typename Double, FMT_ENABLE_IF(sizeof(Double) == sizeof(uint64_t))>
- bool assign(Double d) {
- // Assume double is in the format [sign][exponent][significand].
- using limits = std::numeric_limits<Double>;
+ template <typename Float, FMT_ENABLE_IF(is_supported_float<Float>::value)>
+ bool assign(Float d) {
+ // Assume float is in the format [sign][exponent][significand].
+ using limits = std::numeric_limits<Float>;
+ const int float_significand_size = limits::digits - 1;
const int exponent_size =
- bits<Double>::value - double_significand_size - 1; // -1 for sign
- const uint64_t significand_mask = implicit_bit - 1;
+ bits<Float>::value - float_significand_size - 1; // -1 for sign
+ const uint64_t float_implicit_bit = 1ULL << float_significand_size;
+ const uint64_t significand_mask = float_implicit_bit - 1;
const uint64_t exponent_mask = (~0ULL >> 1) & ~significand_mask;
const int exponent_bias = (1 << exponent_size) - limits::max_exponent - 1;
- auto u = bit_cast<uint64_t>(d);
+ constexpr bool is_double = sizeof(Float) == sizeof(uint64_t);
+ auto u = bit_cast<conditional_t<is_double, uint64_t, uint32_t>>(d);
f = u & significand_mask;
int biased_e =
- static_cast<int>((u & exponent_mask) >> double_significand_size);
+ static_cast<int>((u & exponent_mask) >> float_significand_size);
// Predecessor is closer if d is a normalized power of 2 (f == 0) other than
// the smallest normalized number (biased_e > 1).
bool is_predecessor_closer = f == 0 && biased_e > 1;
if (biased_e != 0)
- f += implicit_bit;
+ f += float_implicit_bit;
else
biased_e = 1; // Subnormals use biased exponent 1 (min exponent).
- e = biased_e - exponent_bias - double_significand_size;
+ e = biased_e - exponent_bias - float_significand_size;
return is_predecessor_closer;
}
- template <typename Double, FMT_ENABLE_IF(sizeof(Double) != sizeof(uint64_t))>
- bool assign(Double) {
+ template <typename Float, FMT_ENABLE_IF(!is_supported_float<Float>::value)>
+ bool assign(Float) {
*this = fp();
return false;
}
-
- // Assigns d to this together with computing lower and upper boundaries,
- // where a boundary is a value half way between the number and its predecessor
- // (lower) or successor (upper). The upper boundary is normalized and lower
- // has the same exponent but may be not normalized.
- template <typename Double> boundaries assign_with_boundaries(Double d) {
- bool is_lower_closer = assign(d);
- fp lower =
- is_lower_closer ? fp((f << 2) - 1, e - 2) : fp((f << 1) - 1, e - 1);
- // 1 in normalize accounts for the exponent shift above.
- fp upper = normalize<1>(fp((f << 1) + 1, e - 1));
- lower.f <<= lower.e - upper.e;
- return boundaries{lower.f, upper.f};
- }
-
- template <typename Double> boundaries assign_float_with_boundaries(Double d) {
- assign(d);
- constexpr int min_normal_e = std::numeric_limits<float>::min_exponent -
- std::numeric_limits<double>::digits;
- significand_type half_ulp = 1 << (std::numeric_limits<double>::digits -
- std::numeric_limits<float>::digits - 1);
- if (min_normal_e > e) half_ulp <<= min_normal_e - e;
- fp upper = normalize<0>(fp(f + half_ulp, e));
- fp lower = fp(
- f - (half_ulp >> ((f == implicit_bit && e > min_normal_e) ? 1 : 0)), e);
- lower.f <<= lower.e - upper.e;
- return boundaries{lower.f, upper.f};
- }
};
// Normalizes the value converted from double and multiplied by (1 << SHIFT).
@@ -488,11 +1197,12 @@ inline fp operator*(fp x, fp y) { return {multiply(x.f, y.f), x.e + y.e + 64}; }
// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its
// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`.
inline fp get_cached_power(int min_exponent, int& pow10_exponent) {
- const int64_t one_over_log2_10 = 0x4d104d42; // round(pow(2, 32) / log2(10))
+ const int shift = 32;
+ const auto significand = static_cast<int64_t>(data::log10_2_significand);
int index = static_cast<int>(
- ((min_exponent + fp::significand_size - 1) * one_over_log2_10 +
- ((int64_t(1) << 32) - 1)) // ceil
- >> 32 // arithmetic shift
+ ((min_exponent + fp::significand_size - 1) * (significand >> shift) +
+ ((int64_t(1) << shift) - 1)) // ceil
+ >> 32 // arithmetic shift
);
// Decimal exponent of the first (smallest) cached power of 10.
const int first_dec_exp = -348;
@@ -500,7 +1210,8 @@ inline fp get_cached_power(int min_exponent, int& pow10_exponent) {
const int dec_exp_step = 8;
index = (index - first_dec_exp - 1) / dec_exp_step + 1;
pow10_exponent = first_dec_exp + index * dec_exp_step;
- return {data::pow10_significands[index], data::pow10_exponents[index]};
+ return {data::grisu_pow10_significands[index],
+ data::grisu_pow10_exponents[index]};
}
// A simple accumulator to hold the sums of terms in bigint::square if uint128_t
@@ -559,9 +1270,8 @@ class bigint {
FMT_ASSERT(compare(*this, other) >= 0, "");
bigit borrow = 0;
int i = other.exp_ - exp_;
- for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) {
+ for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j)
subtract_bigits(i, other.bigits_[j], borrow);
- }
while (borrow > 0) subtract_bigits(i, 0, borrow);
remove_leading_zeros();
}
@@ -733,22 +1443,26 @@ class bigint {
exp_ *= 2;
}
+ // If this bigint has a bigger exponent than other, adds trailing zero to make
+ // exponents equal. This simplifies some operations such as subtraction.
+ void align(const bigint& other) {
+ int exp_difference = exp_ - other.exp_;
+ if (exp_difference <= 0) return;
+ int num_bigits = static_cast<int>(bigits_.size());
+ bigits_.resize(to_unsigned(num_bigits + exp_difference));
+ for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j)
+ bigits_[j] = bigits_[i];
+ std::uninitialized_fill_n(bigits_.data(), exp_difference, 0);
+ exp_ -= exp_difference;
+ }
+
// Divides this bignum by divisor, assigning the remainder to this and
// returning the quotient.
int divmod_assign(const bigint& divisor) {
FMT_ASSERT(this != &divisor, "");
if (compare(*this, divisor) < 0) return 0;
- int num_bigits = static_cast<int>(bigits_.size());
FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, "");
- int exp_difference = exp_ - divisor.exp_;
- if (exp_difference > 0) {
- // Align bigints by adding trailing zeros to simplify subtraction.
- bigits_.resize(to_unsigned(num_bigits + exp_difference));
- for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j)
- bigits_[j] = bigits_[i];
- std::uninitialized_fill_n(bigits_.data(), exp_difference, 0);
- exp_ -= exp_difference;
- }
+ align(divisor);
int quotient = 0;
do {
subtract_aligned(divisor);
@@ -788,20 +1502,6 @@ enum result {
};
}
-// A version of count_digits optimized for grisu_gen_digits.
-inline int grisu_count_digits(uint32_t n) {
- if (n < 10) return 1;
- if (n < 100) return 2;
- if (n < 1000) return 3;
- if (n < 10000) return 4;
- if (n < 100000) return 5;
- if (n < 1000000) return 6;
- if (n < 10000000) return 7;
- if (n < 100000000) return 8;
- if (n < 1000000000) return 9;
- return 10;
-}
-
// Generates output using the Grisu digit-gen algorithm.
// error: the size of the region (lower, upper) outside of which numbers
// definitely do not round to value (Delta in Grisu3).
@@ -817,7 +1517,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error,
FMT_ASSERT(integral == value.f >> -one.e, "");
// The fractional part of scaled value (p2 in Grisu) c = value % one.
uint64_t fractional = value.f & (one.f - 1);
- exp = grisu_count_digits(integral); // kappa in Grisu.
+ exp = count_digits(integral); // kappa in Grisu.
// Divide by 10 to prevent overflow.
auto result = handler.on_start(data::powers_of_10_64[exp - 1] << -one.e,
value.f / 10, error * 10, exp);
@@ -867,8 +1567,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error,
FMT_ASSERT(false, "invalid number of digits");
}
--exp;
- uint64_t remainder =
- (static_cast<uint64_t>(integral) << -one.e) + fractional;
+ auto remainder = (static_cast<uint64_t>(integral) << -one.e) + fractional;
result = handler.on_digit(static_cast<char>('0' + digit),
data::powers_of_10_64[exp] << -one.e, remainder,
error, exp, true);
@@ -878,8 +1577,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error,
for (;;) {
fractional *= 10;
error *= 10;
- char digit =
- static_cast<char>('0' + static_cast<char>(fractional >> -one.e));
+ char digit = static_cast<char>('0' + (fractional >> -one.e));
fractional &= one.f - 1;
--exp;
result = handler.on_digit(digit, one.f, fractional, error, exp, false);
@@ -916,6 +1614,7 @@ struct fixed_handler {
uint64_t error, int, bool integral) {
FMT_ASSERT(remainder < divisor, "");
buf[size++] = digit;
+ if (!integral && error >= remainder) return digits::error;
if (size < precision) return digits::more;
if (!integral) {
// Check if error * 2 < divisor with overflow prevention.
@@ -935,59 +1634,684 @@ struct fixed_handler {
}
if (buf[0] > '9') {
buf[0] = '1';
- buf[size++] = '0';
+ if (fixed)
+ buf[size++] = '0';
+ else
+ ++exp10;
}
return digits::done;
}
};
-// The shortest representation digit handler.
-struct grisu_shortest_handler {
- char* buf;
- int size;
- // Distance between scaled value and upper bound (wp_W in Grisu3).
- uint64_t diff;
+// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox.
+namespace dragonbox {
+// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
+FMT_SAFEBUFFERS inline uint128_wrapper umul128(uint64_t x,
+ uint64_t y) FMT_NOEXCEPT {
+#if FMT_USE_INT128
+ return static_cast<uint128_t>(x) * static_cast<uint128_t>(y);
+#elif defined(_MSC_VER) && defined(_M_X64)
+ uint128_wrapper result;
+ result.low_ = _umul128(x, y, &result.high_);
+ return result;
+#else
+ const uint64_t mask = (uint64_t(1) << 32) - uint64_t(1);
+
+ uint64_t a = x >> 32;
+ uint64_t b = x & mask;
+ uint64_t c = y >> 32;
+ uint64_t d = y & mask;
+
+ uint64_t ac = a * c;
+ uint64_t bc = b * c;
+ uint64_t ad = a * d;
+ uint64_t bd = b * d;
+
+ uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask);
+
+ return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
+ (intermediate << 32) + (bd & mask)};
+#endif
+}
+
+// Computes upper 64 bits of multiplication of two 64-bit unsigned integers.
+FMT_SAFEBUFFERS inline uint64_t umul128_upper64(uint64_t x,
+ uint64_t y) FMT_NOEXCEPT {
+#if FMT_USE_INT128
+ auto p = static_cast<uint128_t>(x) * static_cast<uint128_t>(y);
+ return static_cast<uint64_t>(p >> 64);
+#elif defined(_MSC_VER) && defined(_M_X64)
+ return __umulh(x, y);
+#else
+ return umul128(x, y).high();
+#endif
+}
+
+// Computes upper 64 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+FMT_SAFEBUFFERS inline uint64_t umul192_upper64(uint64_t x, uint128_wrapper y)
+ FMT_NOEXCEPT {
+ uint128_wrapper g0 = umul128(x, y.high());
+ g0 += umul128_upper64(x, y.low());
+ return g0.high();
+}
+
+// Computes upper 32 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline uint32_t umul96_upper32(uint32_t x, uint64_t y) FMT_NOEXCEPT {
+ return static_cast<uint32_t>(umul128_upper64(x, y));
+}
+
+// Computes middle 64 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+FMT_SAFEBUFFERS inline uint64_t umul192_middle64(uint64_t x, uint128_wrapper y)
+ FMT_NOEXCEPT {
+ uint64_t g01 = x * y.high();
+ uint64_t g10 = umul128_upper64(x, y.low());
+ return g01 + g10;
+}
+
+// Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline uint64_t umul96_lower64(uint32_t x, uint64_t y) FMT_NOEXCEPT {
+ return x * y;
+}
+
+// Computes floor(log10(pow(2, e))) for e in [-1700, 1700] using the method from
+// https://fmt.dev/papers/Grisu-Exact.pdf#page=5, section 3.4.
+inline int floor_log10_pow2(int e) FMT_NOEXCEPT {
+ FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent");
+ const int shift = 22;
+ return (e * static_cast<int>(data::log10_2_significand >> (64 - shift))) >>
+ shift;
+}
+
+// Various fast log computations.
+inline int floor_log2_pow10(int e) FMT_NOEXCEPT {
+ FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent");
+ const uint64_t log2_10_integer_part = 3;
+ const uint64_t log2_10_fractional_digits = 0x5269e12f346e2bf9;
+ const int shift_amount = 19;
+ return (e * static_cast<int>(
+ (log2_10_integer_part << shift_amount) |
+ (log2_10_fractional_digits >> (64 - shift_amount)))) >>
+ shift_amount;
+}
+inline int floor_log10_pow2_minus_log10_4_over_3(int e) FMT_NOEXCEPT {
+ FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent");
+ const uint64_t log10_4_over_3_fractional_digits = 0x1ffbfc2bbc780375;
+ const int shift_amount = 22;
+ return (e * static_cast<int>(data::log10_2_significand >>
+ (64 - shift_amount)) -
+ static_cast<int>(log10_4_over_3_fractional_digits >>
+ (64 - shift_amount))) >>
+ shift_amount;
+}
+
+// Returns true iff x is divisible by pow(2, exp).
+inline bool divisible_by_power_of_2(uint32_t x, int exp) FMT_NOEXCEPT {
+ FMT_ASSERT(exp >= 1, "");
+ FMT_ASSERT(x != 0, "");
+#ifdef FMT_BUILTIN_CTZ
+ return FMT_BUILTIN_CTZ(x) >= exp;
+#else
+ return exp < num_bits<uint32_t>() && x == ((x >> exp) << exp);
+#endif
+}
+inline bool divisible_by_power_of_2(uint64_t x, int exp) FMT_NOEXCEPT {
+ FMT_ASSERT(exp >= 1, "");
+ FMT_ASSERT(x != 0, "");
+#ifdef FMT_BUILTIN_CTZLL
+ return FMT_BUILTIN_CTZLL(x) >= exp;
+#else
+ return exp < num_bits<uint64_t>() && x == ((x >> exp) << exp);
+#endif
+}
+
+// Returns true iff x is divisible by pow(5, exp).
+inline bool divisible_by_power_of_5(uint32_t x, int exp) FMT_NOEXCEPT {
+ FMT_ASSERT(exp <= 10, "too large exponent");
+ return x * data::divtest_table_for_pow5_32[exp].mod_inv <=
+ data::divtest_table_for_pow5_32[exp].max_quotient;
+}
+inline bool divisible_by_power_of_5(uint64_t x, int exp) FMT_NOEXCEPT {
+ FMT_ASSERT(exp <= 23, "too large exponent");
+ return x * data::divtest_table_for_pow5_64[exp].mod_inv <=
+ data::divtest_table_for_pow5_64[exp].max_quotient;
+}
+
+// Replaces n by floor(n / pow(5, N)) returning true if and only if n is
+// divisible by pow(5, N).
+// Precondition: n <= 2 * pow(5, N + 1).
+template <int N>
+bool check_divisibility_and_divide_by_pow5(uint32_t& n) FMT_NOEXCEPT {
+ static constexpr struct {
+ uint32_t magic_number;
+ int bits_for_comparison;
+ uint32_t threshold;
+ int shift_amount;
+ } infos[] = {{0xcccd, 16, 0x3333, 18}, {0xa429, 8, 0x0a, 20}};
+ constexpr auto info = infos[N - 1];
+ n *= info.magic_number;
+ const uint32_t comparison_mask = (1u << info.bits_for_comparison) - 1;
+ bool result = (n & comparison_mask) <= info.threshold;
+ n >>= info.shift_amount;
+ return result;
+}
+
+// Computes floor(n / pow(10, N)) for small n and N.
+// Precondition: n <= pow(10, N + 1).
+template <int N> uint32_t small_division_by_pow10(uint32_t n) FMT_NOEXCEPT {
+ static constexpr struct {
+ uint32_t magic_number;
+ int shift_amount;
+ uint32_t divisor_times_10;
+ } infos[] = {{0xcccd, 19, 100}, {0xa3d8, 22, 1000}};
+ constexpr auto info = infos[N - 1];
+ FMT_ASSERT(n <= info.divisor_times_10, "n is too large");
+ return n * info.magic_number >> info.shift_amount;
+}
+
+// Computes floor(n / 10^(kappa + 1)) (float)
+inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) FMT_NOEXCEPT {
+ return n / float_info<float>::big_divisor;
+}
+// Computes floor(n / 10^(kappa + 1)) (double)
+inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) FMT_NOEXCEPT {
+ return umul128_upper64(n, 0x83126e978d4fdf3c) >> 9;
+}
+
+// Various subroutines using pow10 cache
+template <class T> struct cache_accessor;
+
+template <> struct cache_accessor<float> {
+ using carrier_uint = float_info<float>::carrier_uint;
+ using cache_entry_type = uint64_t;
+
+ static uint64_t get_cached_power(int k) FMT_NOEXCEPT {
+ FMT_ASSERT(k >= float_info<float>::min_k && k <= float_info<float>::max_k,
+ "k is out of range");
+ return data::dragonbox_pow10_significands_64[k - float_info<float>::min_k];
+ }
+
+ static carrier_uint compute_mul(carrier_uint u,
+ const cache_entry_type& cache) FMT_NOEXCEPT {
+ return umul96_upper32(u, cache);
+ }
+
+ static uint32_t compute_delta(const cache_entry_type& cache,
+ int beta_minus_1) FMT_NOEXCEPT {
+ return static_cast<uint32_t>(cache >> (64 - 1 - beta_minus_1));
+ }
+
+ static bool compute_mul_parity(carrier_uint two_f,
+ const cache_entry_type& cache,
+ int beta_minus_1) FMT_NOEXCEPT {
+ FMT_ASSERT(beta_minus_1 >= 1, "");
+ FMT_ASSERT(beta_minus_1 < 64, "");
+
+ return ((umul96_lower64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0;
+ }
+
+ static carrier_uint compute_left_endpoint_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return static_cast<carrier_uint>(
+ (cache - (cache >> (float_info<float>::significand_bits + 2))) >>
+ (64 - float_info<float>::significand_bits - 1 - beta_minus_1));
+ }
+
+ static carrier_uint compute_right_endpoint_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return static_cast<carrier_uint>(
+ (cache + (cache >> (float_info<float>::significand_bits + 1))) >>
+ (64 - float_info<float>::significand_bits - 1 - beta_minus_1));
+ }
+
+ static carrier_uint compute_round_up_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return (static_cast<carrier_uint>(
+ cache >>
+ (64 - float_info<float>::significand_bits - 2 - beta_minus_1)) +
+ 1) /
+ 2;
+ }
+};
+
+template <> struct cache_accessor<double> {
+ using carrier_uint = float_info<double>::carrier_uint;
+ using cache_entry_type = uint128_wrapper;
+
+ static uint128_wrapper get_cached_power(int k) FMT_NOEXCEPT {
+ FMT_ASSERT(k >= float_info<double>::min_k && k <= float_info<double>::max_k,
+ "k is out of range");
+
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+ return data::dragonbox_pow10_significands_128[k -
+ float_info<double>::min_k];
+#else
+ static const int compression_ratio = 27;
+
+ // Compute base index.
+ int cache_index = (k - float_info<double>::min_k) / compression_ratio;
+ int kb = cache_index * compression_ratio + float_info<double>::min_k;
+ int offset = k - kb;
+
+ // Get base cache.
+ uint128_wrapper base_cache =
+ data::dragonbox_pow10_significands_128[cache_index];
+ if (offset == 0) return base_cache;
+
+ // Compute the required amount of bit-shift.
+ int alpha = floor_log2_pow10(kb + offset) - floor_log2_pow10(kb) - offset;
+ FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected");
+
+ // Try to recover the real cache.
+ uint64_t pow5 = data::powers_of_5_64[offset];
+ uint128_wrapper recovered_cache = umul128(base_cache.high(), pow5);
+ uint128_wrapper middle_low =
+ umul128(base_cache.low() - (kb < 0 ? 1u : 0u), pow5);
+
+ recovered_cache += middle_low.high();
+
+ uint64_t high_to_middle = recovered_cache.high() << (64 - alpha);
+ uint64_t middle_to_low = recovered_cache.low() << (64 - alpha);
+
+ recovered_cache =
+ uint128_wrapper{(recovered_cache.low() >> alpha) | high_to_middle,
+ ((middle_low.low() >> alpha) | middle_to_low)};
+
+ if (kb < 0) recovered_cache += 1;
+
+ // Get error.
+ int error_idx = (k - float_info<double>::min_k) / 16;
+ uint32_t error = (data::dragonbox_pow10_recovery_errors[error_idx] >>
+ ((k - float_info<double>::min_k) % 16) * 2) &
+ 0x3;
+
+ // Add the error back.
+ FMT_ASSERT(recovered_cache.low() + error >= recovered_cache.low(), "");
+ return {recovered_cache.high(), recovered_cache.low() + error};
+#endif
+ }
+
+ static carrier_uint compute_mul(carrier_uint u,
+ const cache_entry_type& cache) FMT_NOEXCEPT {
+ return umul192_upper64(u, cache);
+ }
+
+ static uint32_t compute_delta(cache_entry_type const& cache,
+ int beta_minus_1) FMT_NOEXCEPT {
+ return static_cast<uint32_t>(cache.high() >> (64 - 1 - beta_minus_1));
+ }
+
+ static bool compute_mul_parity(carrier_uint two_f,
+ const cache_entry_type& cache,
+ int beta_minus_1) FMT_NOEXCEPT {
+ FMT_ASSERT(beta_minus_1 >= 1, "");
+ FMT_ASSERT(beta_minus_1 < 64, "");
- digits::result on_start(uint64_t, uint64_t, uint64_t, int&) {
- return digits::more;
+ return ((umul192_middle64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0;
}
- // Decrement the generated number approaching value from above.
- void round(uint64_t d, uint64_t divisor, uint64_t& remainder,
- uint64_t error) {
- while (
- remainder < d && error - remainder >= divisor &&
- (remainder + divisor < d || d - remainder >= remainder + divisor - d)) {
- --buf[size - 1];
- remainder += divisor;
+ static carrier_uint compute_left_endpoint_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return (cache.high() -
+ (cache.high() >> (float_info<double>::significand_bits + 2))) >>
+ (64 - float_info<double>::significand_bits - 1 - beta_minus_1);
+ }
+
+ static carrier_uint compute_right_endpoint_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return (cache.high() +
+ (cache.high() >> (float_info<double>::significand_bits + 1))) >>
+ (64 - float_info<double>::significand_bits - 1 - beta_minus_1);
+ }
+
+ static carrier_uint compute_round_up_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return ((cache.high() >>
+ (64 - float_info<double>::significand_bits - 2 - beta_minus_1)) +
+ 1) /
+ 2;
+ }
+};
+
+// Various integer checks
+template <class T>
+bool is_left_endpoint_integer_shorter_interval(int exponent) FMT_NOEXCEPT {
+ return exponent >=
+ float_info<
+ T>::case_shorter_interval_left_endpoint_lower_threshold &&
+ exponent <=
+ float_info<T>::case_shorter_interval_left_endpoint_upper_threshold;
+}
+template <class T>
+bool is_endpoint_integer(typename float_info<T>::carrier_uint two_f,
+ int exponent, int minus_k) FMT_NOEXCEPT {
+ if (exponent < float_info<T>::case_fc_pm_half_lower_threshold) return false;
+ // For k >= 0.
+ if (exponent <= float_info<T>::case_fc_pm_half_upper_threshold) return true;
+ // For k < 0.
+ if (exponent > float_info<T>::divisibility_check_by_5_threshold) return false;
+ return divisible_by_power_of_5(two_f, minus_k);
+}
+
+template <class T>
+bool is_center_integer(typename float_info<T>::carrier_uint two_f, int exponent,
+ int minus_k) FMT_NOEXCEPT {
+ // Exponent for 5 is negative.
+ if (exponent > float_info<T>::divisibility_check_by_5_threshold) return false;
+ if (exponent > float_info<T>::case_fc_upper_threshold)
+ return divisible_by_power_of_5(two_f, minus_k);
+ // Both exponents are nonnegative.
+ if (exponent >= float_info<T>::case_fc_lower_threshold) return true;
+ // Exponent for 2 is negative.
+ return divisible_by_power_of_2(two_f, minus_k - exponent + 1);
+}
+
+// Remove trailing zeros from n and return the number of zeros removed (float)
+FMT_ALWAYS_INLINE int remove_trailing_zeros(uint32_t& n) FMT_NOEXCEPT {
+#ifdef FMT_BUILTIN_CTZ
+ int t = FMT_BUILTIN_CTZ(n);
+#else
+ int t = ctz(n);
+#endif
+ if (t > float_info<float>::max_trailing_zeros)
+ t = float_info<float>::max_trailing_zeros;
+
+ const uint32_t mod_inv1 = 0xcccccccd;
+ const uint32_t max_quotient1 = 0x33333333;
+ const uint32_t mod_inv2 = 0xc28f5c29;
+ const uint32_t max_quotient2 = 0x0a3d70a3;
+
+ int s = 0;
+ for (; s < t - 1; s += 2) {
+ if (n * mod_inv2 > max_quotient2) break;
+ n *= mod_inv2;
+ }
+ if (s < t && n * mod_inv1 <= max_quotient1) {
+ n *= mod_inv1;
+ ++s;
+ }
+ n >>= s;
+ return s;
+}
+
+// Removes trailing zeros and returns the number of zeros removed (double)
+FMT_ALWAYS_INLINE int remove_trailing_zeros(uint64_t& n) FMT_NOEXCEPT {
+#ifdef FMT_BUILTIN_CTZLL
+ int t = FMT_BUILTIN_CTZLL(n);
+#else
+ int t = ctzll(n);
+#endif
+ if (t > float_info<double>::max_trailing_zeros)
+ t = float_info<double>::max_trailing_zeros;
+ // Divide by 10^8 and reduce to 32-bits
+ // Since ret_value.significand <= (2^64 - 1) / 1000 < 10^17,
+ // both of the quotient and the r should fit in 32-bits
+
+ const uint32_t mod_inv1 = 0xcccccccd;
+ const uint32_t max_quotient1 = 0x33333333;
+ const uint64_t mod_inv8 = 0xc767074b22e90e21;
+ const uint64_t max_quotient8 = 0x00002af31dc46118;
+
+ // If the number is divisible by 1'0000'0000, work with the quotient
+ if (t >= 8) {
+ auto quotient_candidate = n * mod_inv8;
+
+ if (quotient_candidate <= max_quotient8) {
+ auto quotient = static_cast<uint32_t>(quotient_candidate >> 8);
+
+ int s = 8;
+ for (; s < t; ++s) {
+ if (quotient * mod_inv1 > max_quotient1) break;
+ quotient *= mod_inv1;
+ }
+ quotient >>= (s - 8);
+ n = quotient;
+ return s;
}
}
- // Implements Grisu's round_weed.
- digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder,
- uint64_t error, int exp, bool integral) {
- buf[size++] = digit;
- if (remainder >= error) return digits::more;
- uint64_t unit = integral ? 1 : data::powers_of_10_64[-exp];
- uint64_t up = (diff - 1) * unit; // wp_Wup
- round(up, divisor, remainder, error);
- uint64_t down = (diff + 1) * unit; // wp_Wdown
- if (remainder < down && error - remainder >= divisor &&
- (remainder + divisor < down ||
- down - remainder > remainder + divisor - down)) {
- return digits::error;
+ // Otherwise, work with the remainder
+ auto quotient = static_cast<uint32_t>(n / 100000000);
+ auto remainder = static_cast<uint32_t>(n - 100000000 * quotient);
+
+ if (t == 0 || remainder * mod_inv1 > max_quotient1) {
+ return 0;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 1 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 1) + quotient * 10000000ull;
+ return 1;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 2 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 2) + quotient * 1000000ull;
+ return 2;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 3 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 3) + quotient * 100000ull;
+ return 3;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 4 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 4) + quotient * 10000ull;
+ return 4;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 5 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 5) + quotient * 1000ull;
+ return 5;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 6 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 6) + quotient * 100ull;
+ return 6;
+ }
+ remainder *= mod_inv1;
+
+ n = (remainder >> 7) + quotient * 10ull;
+ return 7;
+}
+
+// The main algorithm for shorter interval case
+template <class T>
+FMT_ALWAYS_INLINE FMT_SAFEBUFFERS decimal_fp<T> shorter_interval_case(
+ int exponent) FMT_NOEXCEPT {
+ decimal_fp<T> ret_value;
+ // Compute k and beta
+ const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent);
+ const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k);
+
+ // Compute xi and zi
+ using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+ const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+
+ auto xi = cache_accessor<T>::compute_left_endpoint_for_shorter_interval_case(
+ cache, beta_minus_1);
+ auto zi = cache_accessor<T>::compute_right_endpoint_for_shorter_interval_case(
+ cache, beta_minus_1);
+
+ // If the left endpoint is not an integer, increase it
+ if (!is_left_endpoint_integer_shorter_interval<T>(exponent)) ++xi;
+
+ // Try bigger divisor
+ ret_value.significand = zi / 10;
+
+ // If succeed, remove trailing zeros if necessary and return
+ if (ret_value.significand * 10 >= xi) {
+ ret_value.exponent = minus_k + 1;
+ ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+ return ret_value;
+ }
+
+ // Otherwise, compute the round-up of y
+ ret_value.significand =
+ cache_accessor<T>::compute_round_up_for_shorter_interval_case(
+ cache, beta_minus_1);
+ ret_value.exponent = minus_k;
+
+ // When tie occurs, choose one of them according to the rule
+ if (exponent >= float_info<T>::shorter_interval_tie_lower_threshold &&
+ exponent <= float_info<T>::shorter_interval_tie_upper_threshold) {
+ ret_value.significand = ret_value.significand % 2 == 0
+ ? ret_value.significand
+ : ret_value.significand - 1;
+ } else if (ret_value.significand < xi) {
+ ++ret_value.significand;
+ }
+ return ret_value;
+}
+
+template <typename T>
+FMT_SAFEBUFFERS decimal_fp<T> to_decimal(T x) FMT_NOEXCEPT {
+ // Step 1: integer promotion & Schubfach multiplier calculation.
+
+ using carrier_uint = typename float_info<T>::carrier_uint;
+ using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+ auto br = bit_cast<carrier_uint>(x);
+
+ // Extract significand bits and exponent bits.
+ const carrier_uint significand_mask =
+ (static_cast<carrier_uint>(1) << float_info<T>::significand_bits) - 1;
+ carrier_uint significand = (br & significand_mask);
+ int exponent = static_cast<int>((br & exponent_mask<T>()) >>
+ float_info<T>::significand_bits);
+
+ if (exponent != 0) { // Check if normal.
+ exponent += float_info<T>::exponent_bias - float_info<T>::significand_bits;
+
+ // Shorter interval case; proceed like Schubfach.
+ if (significand == 0) return shorter_interval_case<T>(exponent);
+
+ significand |=
+ (static_cast<carrier_uint>(1) << float_info<T>::significand_bits);
+ } else {
+ // Subnormal case; the interval is always regular.
+ if (significand == 0) return {0, 0};
+ exponent = float_info<T>::min_exponent - float_info<T>::significand_bits;
+ }
+
+ const bool include_left_endpoint = (significand % 2 == 0);
+ const bool include_right_endpoint = include_left_endpoint;
+
+ // Compute k and beta.
+ const int minus_k = floor_log10_pow2(exponent) - float_info<T>::kappa;
+ const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+ const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k);
+
+ // Compute zi and deltai
+ // 10^kappa <= deltai < 10^(kappa + 1)
+ const uint32_t deltai = cache_accessor<T>::compute_delta(cache, beta_minus_1);
+ const carrier_uint two_fc = significand << 1;
+ const carrier_uint two_fr = two_fc | 1;
+ const carrier_uint zi =
+ cache_accessor<T>::compute_mul(two_fr << beta_minus_1, cache);
+
+ // Step 2: Try larger divisor; remove trailing zeros if necessary
+
+ // Using an upper bound on zi, we might be able to optimize the division
+ // better than the compiler; we are computing zi / big_divisor here
+ decimal_fp<T> ret_value;
+ ret_value.significand = divide_by_10_to_kappa_plus_1(zi);
+ uint32_t r = static_cast<uint32_t>(zi - float_info<T>::big_divisor *
+ ret_value.significand);
+
+ if (r > deltai) {
+ goto small_divisor_case_label;
+ } else if (r < deltai) {
+ // Exclude the right endpoint if necessary
+ if (r == 0 && !include_right_endpoint &&
+ is_endpoint_integer<T>(two_fr, exponent, minus_k)) {
+ --ret_value.significand;
+ r = float_info<T>::big_divisor;
+ goto small_divisor_case_label;
+ }
+ } else {
+ // r == deltai; compare fractional parts
+ // Check conditions in the order different from the paper
+ // to take advantage of short-circuiting
+ const carrier_uint two_fl = two_fc - 1;
+ if ((!include_left_endpoint ||
+ !is_endpoint_integer<T>(two_fl, exponent, minus_k)) &&
+ !cache_accessor<T>::compute_mul_parity(two_fl, cache, beta_minus_1)) {
+ goto small_divisor_case_label;
}
- return 2 * unit <= remainder && remainder <= error - 4 * unit
- ? digits::done
- : digits::error;
}
-};
+ ret_value.exponent = minus_k + float_info<T>::kappa + 1;
+
+ // We may need to remove trailing zeros
+ ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+ return ret_value;
+
+ // Step 3: Find the significand with the smaller divisor
+
+small_divisor_case_label:
+ ret_value.significand *= 10;
+ ret_value.exponent = minus_k + float_info<T>::kappa;
+
+ const uint32_t mask = (1u << float_info<T>::kappa) - 1;
+ auto dist = r - (deltai / 2) + (float_info<T>::small_divisor / 2);
+
+ // Is dist divisible by 2^kappa?
+ if ((dist & mask) == 0) {
+ const bool approx_y_parity =
+ ((dist ^ (float_info<T>::small_divisor / 2)) & 1) != 0;
+ dist >>= float_info<T>::kappa;
+
+ // Is dist divisible by 5^kappa?
+ if (check_divisibility_and_divide_by_pow5<float_info<T>::kappa>(dist)) {
+ ret_value.significand += dist;
+
+ // Check z^(f) >= epsilon^(f)
+ // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1,
+ // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f)
+ // Since there are only 2 possibilities, we only need to care about the
+ // parity. Also, zi and r should have the same parity since the divisor
+ // is an even number
+ if (cache_accessor<T>::compute_mul_parity(two_fc, cache, beta_minus_1) !=
+ approx_y_parity) {
+ --ret_value.significand;
+ } else {
+ // If z^(f) >= epsilon^(f), we might have a tie
+ // when z^(f) == epsilon^(f), or equivalently, when y is an integer
+ if (is_center_integer<T>(two_fc, exponent, minus_k)) {
+ ret_value.significand = ret_value.significand % 2 == 0
+ ? ret_value.significand
+ : ret_value.significand - 1;
+ }
+ }
+ }
+ // Is dist not divisible by 5^kappa?
+ else {
+ ret_value.significand += dist;
+ }
+ }
+ // Is dist not divisible by 2^kappa?
+ else {
+ // Since we know dist is small, we might be able to optimize the division
+ // better than the compiler; we are computing dist / small_divisor here
+ ret_value.significand +=
+ small_division_by_pow10<float_info<T>::kappa>(dist);
+ }
+ return ret_value;
+}
+} // namespace dragonbox
// Formats value using a variation of the Fixed-Precision Positive
// Floating-Point Printout ((FPP)^2) algorithm by Steele & White:
// https://fmt.dev/p372-steele.pdf.
template <typename Double>
-void fallback_format(Double d, buffer<char>& buf, int& exp10) {
+void fallback_format(Double d, int num_digits, bool binary32, buffer<char>& buf,
+ int& exp10) {
bigint numerator; // 2 * R in (FPP)^2.
bigint denominator; // 2 * S in (FPP)^2.
// lower and upper are differences between value and corresponding boundaries.
@@ -998,8 +2322,9 @@ void fallback_format(Double d, buffer<char>& buf, int& exp10) {
// Shift numerator and denominator by an extra bit or two (if lower boundary
// is closer) to make lower and upper integers. This eliminates multiplication
// by 2 during later computations.
- // TODO: handle float
- int shift = value.assign(d) ? 2 : 1;
+ const bool is_predecessor_closer =
+ binary32 ? value.assign(static_cast<float>(d)) : value.assign(d);
+ int shift = is_predecessor_closer ? 2 : 1;
uint64_t significand = value.f << shift;
if (value.e >= 0) {
numerator.assign(significand);
@@ -1012,7 +2337,7 @@ void fallback_format(Double d, buffer<char>& buf, int& exp10) {
upper = &upper_store;
}
denominator.assign_pow10(exp10);
- denominator <<= 1;
+ denominator <<= shift;
} else if (exp10 < 0) {
numerator.assign_pow10(-exp10);
lower.assign(numerator);
@@ -1034,39 +2359,73 @@ void fallback_format(Double d, buffer<char>& buf, int& exp10) {
upper = &upper_store;
}
}
- if (!upper) upper = &lower;
// Invariant: value == (numerator / denominator) * pow(10, exp10).
- bool even = (value.f & 1) == 0;
- int num_digits = 0;
- char* data = buf.data();
- for (;;) {
- int digit = numerator.divmod_assign(denominator);
- bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower.
- // numerator + upper >[=] pow10:
- bool high = add_compare(numerator, *upper, denominator) + even > 0;
- data[num_digits++] = static_cast<char>('0' + digit);
- if (low || high) {
- if (!low) {
- ++data[num_digits - 1];
- } else if (high) {
- int result = add_compare(numerator, numerator, denominator);
- // Round half to even.
- if (result > 0 || (result == 0 && (digit % 2) != 0))
+ if (num_digits < 0) {
+ // Generate the shortest representation.
+ if (!upper) upper = &lower;
+ bool even = (value.f & 1) == 0;
+ num_digits = 0;
+ char* data = buf.data();
+ for (;;) {
+ int digit = numerator.divmod_assign(denominator);
+ bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower.
+ // numerator + upper >[=] pow10:
+ bool high = add_compare(numerator, *upper, denominator) + even > 0;
+ data[num_digits++] = static_cast<char>('0' + digit);
+ if (low || high) {
+ if (!low) {
++data[num_digits - 1];
+ } else if (high) {
+ int result = add_compare(numerator, numerator, denominator);
+ // Round half to even.
+ if (result > 0 || (result == 0 && (digit % 2) != 0))
+ ++data[num_digits - 1];
+ }
+ buf.try_resize(to_unsigned(num_digits));
+ exp10 -= num_digits - 1;
+ return;
}
- buf.resize(to_unsigned(num_digits));
- exp10 -= num_digits - 1;
- return;
+ numerator *= 10;
+ lower *= 10;
+ if (upper != &lower) *upper *= 10;
}
+ }
+ // Generate the given number of digits.
+ exp10 -= num_digits - 1;
+ if (num_digits == 0) {
+ buf.try_resize(1);
+ denominator *= 10;
+ buf[0] = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0';
+ return;
+ }
+ buf.try_resize(to_unsigned(num_digits));
+ for (int i = 0; i < num_digits - 1; ++i) {
+ int digit = numerator.divmod_assign(denominator);
+ buf[i] = static_cast<char>('0' + digit);
numerator *= 10;
- lower *= 10;
- if (upper != &lower) *upper *= 10;
}
+ int digit = numerator.divmod_assign(denominator);
+ auto result = add_compare(numerator, numerator, denominator);
+ if (result > 0 || (result == 0 && (digit % 2) != 0)) {
+ if (digit == 9) {
+ const auto overflow = '0' + 10;
+ buf[num_digits - 1] = overflow;
+ // Propagate the carry.
+ for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) {
+ buf[i] = '0';
+ ++buf[i - 1];
+ }
+ if (buf[0] == overflow) {
+ buf[0] = '1';
+ ++exp10;
+ }
+ return;
+ }
+ ++digit;
+ }
+ buf[num_digits - 1] = static_cast<char>('0' + digit);
}
-// Formats value using the Grisu algorithm
-// (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf)
-// if T is a IEEE754 binary32 or binary64 and snprintf otherwise.
template <typename T>
int format_float(T value, int precision, float_specs specs, buffer<char>& buf) {
static_assert(!std::is_same<T, float>::value, "");
@@ -1078,66 +2437,57 @@ int format_float(T value, int precision, float_specs specs, buffer<char>& buf) {
buf.push_back('0');
return 0;
}
- buf.resize(to_unsigned(precision));
+ buf.try_resize(to_unsigned(precision));
std::uninitialized_fill_n(buf.data(), precision, '0');
return -precision;
}
if (!specs.use_grisu) return snprintf_float(value, precision, specs, buf);
+ if (precision < 0) {
+ // Use Dragonbox for the shortest format.
+ if (specs.binary32) {
+ auto dec = dragonbox::to_decimal(static_cast<float>(value));
+ write<char>(buffer_appender<char>(buf), dec.significand);
+ return dec.exponent;
+ }
+ auto dec = dragonbox::to_decimal(static_cast<double>(value));
+ write<char>(buffer_appender<char>(buf), dec.significand);
+ return dec.exponent;
+ }
+
+ // Use Grisu + Dragon4 for the given precision:
+ // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf.
int exp = 0;
const int min_exp = -60; // alpha in Grisu.
int cached_exp10 = 0; // K in Grisu.
- if (precision < 0) {
- fp fp_value;
- auto boundaries = specs.binary32
- ? fp_value.assign_float_with_boundaries(value)
- : fp_value.assign_with_boundaries(value);
- fp_value = normalize(fp_value);
- // Find a cached power of 10 such that multiplying value by it will bring
- // the exponent in the range [min_exp, -32].
- const fp cached_pow = get_cached_power(
- min_exp - (fp_value.e + fp::significand_size), cached_exp10);
- // Multiply value and boundaries by the cached power of 10.
- fp_value = fp_value * cached_pow;
- boundaries.lower = multiply(boundaries.lower, cached_pow.f);
- boundaries.upper = multiply(boundaries.upper, cached_pow.f);
- assert(min_exp <= fp_value.e && fp_value.e <= -32);
- --boundaries.lower; // \tilde{M}^- - 1 ulp -> M^-_{\downarrow}.
- ++boundaries.upper; // \tilde{M}^+ + 1 ulp -> M^+_{\uparrow}.
- // Numbers outside of (lower, upper) definitely do not round to value.
- grisu_shortest_handler handler{buf.data(), 0,
- boundaries.upper - fp_value.f};
- auto result =
- grisu_gen_digits(fp(boundaries.upper, fp_value.e),
- boundaries.upper - boundaries.lower, exp, handler);
- if (result == digits::error) {
- exp += handler.size - cached_exp10 - 1;
- fallback_format(value, buf, exp);
- return exp;
- }
- buf.resize(to_unsigned(handler.size));
+ fp normalized = normalize(fp(value));
+ const auto cached_pow = get_cached_power(
+ min_exp - (normalized.e + fp::significand_size), cached_exp10);
+ normalized = normalized * cached_pow;
+ // Limit precision to the maximum possible number of significant digits in an
+ // IEEE754 double because we don't need to generate zeros.
+ const int max_double_digits = 767;
+ if (precision > max_double_digits) precision = max_double_digits;
+ fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed};
+ if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error) {
+ exp += handler.size - cached_exp10 - 1;
+ fallback_format(value, handler.precision, specs.binary32, buf, exp);
} else {
- if (precision > 17) return snprintf_float(value, precision, specs, buf);
- fp normalized = normalize(fp(value));
- const auto cached_pow = get_cached_power(
- min_exp - (normalized.e + fp::significand_size), cached_exp10);
- normalized = normalized * cached_pow;
- fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed};
- if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error)
- return snprintf_float(value, precision, specs, buf);
- int num_digits = handler.size;
- if (!fixed) {
- // Remove trailing zeros.
- while (num_digits > 0 && buf[num_digits - 1] == '0') {
- --num_digits;
- ++exp;
- }
+ exp += handler.exp10;
+ buf.try_resize(to_unsigned(handler.size));
+ }
+ if (!fixed && !specs.showpoint) {
+ // Remove trailing zeros.
+ auto num_digits = buf.size();
+ while (num_digits > 0 && buf[num_digits - 1] == '0') {
+ --num_digits;
+ ++exp;
}
- buf.resize(to_unsigned(num_digits));
+ buf.try_resize(num_digits);
}
- return exp - cached_exp10;
-}
+ return exp;
+} // namespace detail
template <typename T>
int snprintf_float(T value, int precision, float_specs specs,
@@ -1185,19 +2535,20 @@ int snprintf_float(T value, int precision, float_specs specs,
? snprintf_ptr(begin, capacity, format, precision, value)
: snprintf_ptr(begin, capacity, format, value);
if (result < 0) {
- buf.reserve(buf.capacity() + 1); // The buffer will grow exponentially.
+ // The buffer will grow exponentially.
+ buf.try_reserve(buf.capacity() + 1);
continue;
}
auto size = to_unsigned(result);
// Size equal to capacity means that the last character was truncated.
if (size >= capacity) {
- buf.reserve(size + offset + 1); // Add 1 for the terminating '\0'.
+ buf.try_reserve(size + offset + 1); // Add 1 for the terminating '\0'.
continue;
}
auto is_digit = [](char c) { return c >= '0' && c <= '9'; };
if (specs.format == float_format::fixed) {
if (precision == 0) {
- buf.resize(size);
+ buf.try_resize(size);
return 0;
}
// Find and remove the decimal point.
@@ -1207,11 +2558,11 @@ int snprintf_float(T value, int precision, float_specs specs,
} while (is_digit(*p));
int fraction_size = static_cast<int>(end - p - 1);
std::memmove(p, p + 1, to_unsigned(fraction_size));
- buf.resize(size - 1);
+ buf.try_resize(size - 1);
return -fraction_size;
}
if (specs.format == float_format::hex) {
- buf.resize(size + offset);
+ buf.try_resize(size + offset);
return 0;
}
// Find and parse the exponent.
@@ -1237,7 +2588,7 @@ int snprintf_float(T value, int precision, float_specs specs,
fraction_size = static_cast<int>(fraction_end - begin - 1);
std::memmove(begin + 1, begin + 2, to_unsigned(fraction_size));
}
- buf.resize(to_unsigned(fraction_size) + offset + 1);
+ buf.try_resize(to_unsigned(fraction_size) + offset + 1);
return exp - fraction_size;
}
}
@@ -1259,25 +2610,18 @@ int snprintf_float(T value, int precision, float_specs specs,
* occurs, this pointer will be a guess that depends on the particular
* error, but it will always advance at least one byte.
*/
-FMT_FUNC const char* utf8_decode(const char* buf, uint32_t* c, int* e) {
- static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 2, 2, 2, 2, 3, 3, 4, 0};
+inline const char* utf8_decode(const char* buf, uint32_t* c, int* e) {
static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
static const int shiftc[] = {0, 18, 12, 6, 0};
static const int shifte[] = {0, 6, 4, 2, 0};
- auto s = reinterpret_cast<const unsigned char*>(buf);
- int len = lengths[s[0] >> 3];
-
- // Compute the pointer to the next character early so that the next
- // iteration can start working on the next character. Neither Clang
- // nor GCC figure out this reordering on their own.
- const char* next = buf + len + !len;
+ int len = code_point_length(buf);
+ const char* next = buf + len;
// Assume a four-byte character and load four bytes. Unused bits are
// shifted out.
+ auto s = reinterpret_cast<const unsigned char*>(buf);
*c = uint32_t(s[0] & masks[len]) << 18;
*c |= uint32_t(s[1] & 0x3f) << 12;
*c |= uint32_t(s[2] & 0x3f) << 6;
@@ -1296,6 +2640,19 @@ FMT_FUNC const char* utf8_decode(const char* buf, uint32_t* c, int* e) {
return next;
}
+
+struct stringifier {
+ template <typename T> FMT_INLINE std::string operator()(T value) const {
+ return to_string(value);
+ }
+ std::string operator()(basic_format_arg<format_context>::handle h) const {
+ memory_buffer buf;
+ format_parse_context parse_ctx({});
+ format_context format_ctx(buffer_appender<char>(buf), {}, {});
+ h.format(parse_ctx, format_ctx);
+ return to_string(buf);
+ }
+};
} // namespace detail
template <> struct formatter<detail::bigint> {
@@ -1363,7 +2720,8 @@ FMT_FUNC void format_system_error(detail::buffer<char>& out, int error_code,
int result =
detail::safe_strerror(error_code, system_message, buf.size());
if (result == 0) {
- format_to(std::back_inserter(out), "{}: {}", message, system_message);
+ format_to(detail::buffer_appender<char>(out), "{}: {}", message,
+ system_message);
return;
}
if (result != ERANGE)
@@ -1384,20 +2742,6 @@ FMT_FUNC void report_system_error(int error_code,
report_error(format_system_error, error_code, message);
}
-struct stringifier {
- template <typename T> FMT_INLINE std::string operator()(T value) const {
- return to_string(value);
- }
- std::string operator()(basic_format_arg<format_context>::handle h) const {
- memory_buffer buf;
- detail::buffer<char>& base = buf;
- format_parse_context parse_ctx({});
- format_context format_ctx(std::back_inserter(base), {}, {});
- h.format(parse_ctx, format_ctx);
- return to_string(buf);
- }
-};
-
FMT_FUNC std::string detail::vformat(string_view format_str, format_args args) {
if (format_str.size() == 2 && equal2(format_str.data(), "{}")) {
auto arg = args.get(0);
@@ -1409,6 +2753,14 @@ FMT_FUNC std::string detail::vformat(string_view format_str, format_args args) {
return to_string(buffer);
}
+#ifdef _WIN32
+namespace detail {
+using dword = conditional_t<sizeof(long) == 4, unsigned long, unsigned>;
+extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( //
+ void*, const void*, dword, dword*, void*);
+} // namespace detail
+#endif
+
FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) {
memory_buffer buffer;
detail::vformat_to(buffer, format_str,
@@ -1417,10 +2769,10 @@ FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) {
auto fd = _fileno(f);
if (_isatty(fd)) {
detail::utf8_to_utf16 u16(string_view(buffer.data(), buffer.size()));
- auto written = DWORD();
- if (!WriteConsoleW(reinterpret_cast<HANDLE>(_get_osfhandle(fd)),
- u16.c_str(), static_cast<DWORD>(u16.size()), &written,
- nullptr)) {
+ auto written = detail::dword();
+ if (!detail::WriteConsoleW(reinterpret_cast<void*>(_get_osfhandle(fd)),
+ u16.c_str(), static_cast<uint32_t>(u16.size()),
+ &written, nullptr)) {
FMT_THROW(format_error("failed to write to console"));
}
return;
@@ -1446,8 +2798,4 @@ FMT_FUNC void vprint(string_view format_str, format_args args) {
FMT_END_NAMESPACE
-#ifdef _MSC_VER
-# pragma warning(pop)
-#endif
-
#endif // FMT_FORMAT_INL_H_
diff --git a/src/third_party/fmt/format.h b/src/third_party/fmt/format.h
index 17509b7..1a037b0 100644
--- a/src/third_party/fmt/format.h
+++ b/src/third_party/fmt/format.h
@@ -70,9 +70,11 @@
#endif
#if __cplusplus == 201103L || __cplusplus == 201402L
-# if defined(__clang__)
+# if defined(__INTEL_COMPILER) || defined(__PGI)
+# define FMT_FALLTHROUGH
+# elif defined(__clang__)
# define FMT_FALLTHROUGH [[clang::fallthrough]]
-# elif FMT_GCC_VERSION >= 700 && !defined(__PGI) && \
+# elif FMT_GCC_VERSION >= 700 && \
(!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520)
# define FMT_FALLTHROUGH [[gnu::fallthrough]]
# else
@@ -139,12 +141,13 @@ FMT_END_NAMESPACE
#endif
#ifndef FMT_USE_UDL_TEMPLATE
-// EDG frontend based compilers (icc, nvcc, etc) and GCC < 6.4 do not properly
-// support UDL templates and GCC >= 9 warns about them.
+// EDG frontend based compilers (icc, nvcc, PGI, etc) and GCC < 6.4 do not
+// properly support UDL templates and GCC >= 9 warns about them.
# if FMT_USE_USER_DEFINED_LITERALS && \
(!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 501) && \
((FMT_GCC_VERSION >= 604 && __cplusplus >= 201402L) || \
- FMT_CLANG_VERSION >= 304)
+ FMT_CLANG_VERSION >= 304) && \
+ !defined(__PGI) && !defined(__NVCC__)
# define FMT_USE_UDL_TEMPLATE 1
# else
# define FMT_USE_UDL_TEMPLATE 0
@@ -163,6 +166,14 @@ FMT_END_NAMESPACE
# define FMT_USE_LONG_DOUBLE 1
#endif
+// Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of
+// int_writer template instances to just one by only using the largest integer
+// type. This results in a reduction in binary size but will cause a decrease in
+// integer formatting performance.
+#if !defined(FMT_REDUCE_INT_INSTANTIATIONS)
+# define FMT_REDUCE_INT_INSTANTIATIONS 0
+#endif
+
// __builtin_clz is broken in clang with Microsoft CodeGen:
// https://github.com/fmtlib/fmt/issues/519
#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER
@@ -171,56 +182,87 @@ FMT_END_NAMESPACE
#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER
# define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n)
#endif
+#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctz))
+# define FMT_BUILTIN_CTZ(n) __builtin_ctz(n)
+#endif
+#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctzll))
+# define FMT_BUILTIN_CTZLL(n) __builtin_ctzll(n)
+#endif
+
+#if FMT_MSC_VER
+# include <intrin.h> // _BitScanReverse[64], _BitScanForward[64], _umul128
+#endif
// Some compilers masquerade as both MSVC and GCC-likes or otherwise support
// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the
// MSVC intrinsics if the clz and clzll builtins are not available.
-#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED)
-# include <intrin.h> // _BitScanReverse, _BitScanReverse64
-
+#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && \
+ !defined(FMT_BUILTIN_CTZLL) && !defined(_MANAGED)
FMT_BEGIN_NAMESPACE
namespace detail {
// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning.
# ifndef __clang__
+# pragma intrinsic(_BitScanForward)
# pragma intrinsic(_BitScanReverse)
# endif
-inline uint32_t clz(uint32_t x) {
+# if defined(_WIN64) && !defined(__clang__)
+# pragma intrinsic(_BitScanForward64)
+# pragma intrinsic(_BitScanReverse64)
+# endif
+
+inline int clz(uint32_t x) {
unsigned long r = 0;
_BitScanReverse(&r, x);
-
FMT_ASSERT(x != 0, "");
// Static analysis complains about using uninitialized data
// "r", but the only way that can happen is if "x" is 0,
// which the callers guarantee to not happen.
FMT_SUPPRESS_MSC_WARNING(6102)
- return 31 - r;
+ return 31 ^ static_cast<int>(r);
}
# define FMT_BUILTIN_CLZ(n) detail::clz(n)
-# if defined(_WIN64) && !defined(__clang__)
-# pragma intrinsic(_BitScanReverse64)
-# endif
-
-inline uint32_t clzll(uint64_t x) {
+inline int clzll(uint64_t x) {
unsigned long r = 0;
# ifdef _WIN64
_BitScanReverse64(&r, x);
# else
// Scan the high 32 bits.
- if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32))) return 63 - (r + 32);
-
+ if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32))) return 63 ^ (r + 32);
// Scan the low 32 bits.
_BitScanReverse(&r, static_cast<uint32_t>(x));
# endif
-
FMT_ASSERT(x != 0, "");
- // Static analysis complains about using uninitialized data
- // "r", but the only way that can happen is if "x" is 0,
- // which the callers guarantee to not happen.
- FMT_SUPPRESS_MSC_WARNING(6102)
- return 63 - r;
+ FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning.
+ return 63 ^ static_cast<int>(r);
}
# define FMT_BUILTIN_CLZLL(n) detail::clzll(n)
+
+inline int ctz(uint32_t x) {
+ unsigned long r = 0;
+ _BitScanForward(&r, x);
+ FMT_ASSERT(x != 0, "");
+ FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning.
+ return static_cast<int>(r);
+}
+# define FMT_BUILTIN_CTZ(n) detail::ctz(n)
+
+inline int ctzll(uint64_t x) {
+ unsigned long r = 0;
+ FMT_ASSERT(x != 0, "");
+ FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning.
+# ifdef _WIN64
+ _BitScanForward64(&r, x);
+# else
+ // Scan the low 32 bits.
+ if (_BitScanForward(&r, static_cast<uint32_t>(x))) return static_cast<int>(r);
+ // Scan the high 32 bits.
+ _BitScanForward(&r, static_cast<uint32_t>(x >> 32));
+ r += 32;
+# endif
+ return static_cast<int>(r);
+}
+# define FMT_BUILTIN_CTZLL(n) detail::ctzll(n)
} // namespace detail
FMT_END_NAMESPACE
#endif
@@ -298,50 +340,11 @@ FMT_INLINE void assume(bool condition) {
#endif
}
-// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
-template <typename... Ts> struct void_t_impl { using type = void; };
-
-template <typename... Ts>
-using void_t = typename detail::void_t_impl<Ts...>::type;
-
// An approximation of iterator_t for pre-C++20 systems.
template <typename T>
using iterator_t = decltype(std::begin(std::declval<T&>()));
template <typename T> using sentinel_t = decltype(std::end(std::declval<T&>()));
-// Detect the iterator category of *any* given type in a SFINAE-friendly way.
-// Unfortunately, older implementations of std::iterator_traits are not safe
-// for use in a SFINAE-context.
-template <typename It, typename Enable = void>
-struct iterator_category : std::false_type {};
-
-template <typename T> struct iterator_category<T*> {
- using type = std::random_access_iterator_tag;
-};
-
-template <typename It>
-struct iterator_category<It, void_t<typename It::iterator_category>> {
- using type = typename It::iterator_category;
-};
-
-// Detect if *any* given type models the OutputIterator concept.
-template <typename It> class is_output_iterator {
- // Check for mutability because all iterator categories derived from
- // std::input_iterator_tag *may* also meet the requirements of an
- // OutputIterator, thereby falling into the category of 'mutable iterators'
- // [iterator.requirements.general] clause 4. The compiler reveals this
- // property only at the point of *actually dereferencing* the iterator!
- template <typename U>
- static decltype(*(std::declval<U>())) test(std::input_iterator_tag);
- template <typename U> static char& test(std::output_iterator_tag);
- template <typename U> static const char& test(...);
-
- using type = decltype(test<It>(typename iterator_category<It>::type{}));
-
- public:
- enum { value = !std::is_const<remove_reference_t<type>>::value };
-};
-
// A workaround for std::string not having mutable data() until C++17.
template <typename Char> inline Char* get_data(std::basic_string<Char>& s) {
return &s[0];
@@ -374,10 +377,29 @@ reserve(std::back_insert_iterator<Container> it, size_t n) {
return make_checked(get_data(c) + size, n);
}
+template <typename T>
+inline buffer_appender<T> reserve(buffer_appender<T> it, size_t n) {
+ buffer<T>& buf = get_container(it);
+ buf.try_reserve(buf.size() + n);
+ return it;
+}
+
template <typename Iterator> inline Iterator& reserve(Iterator& it, size_t) {
return it;
}
+template <typename T, typename OutputIt>
+constexpr T* to_pointer(OutputIt, size_t) {
+ return nullptr;
+}
+template <typename T> T* to_pointer(buffer_appender<T> it, size_t n) {
+ buffer<T>& buf = get_container(it);
+ auto size = buf.size();
+ if (buf.capacity() < size + n) return nullptr;
+ buf.try_resize(size + n);
+ return buf.data() + size;
+}
+
template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
inline std::back_insert_iterator<Container> base_iterator(
std::back_insert_iterator<Container>& it,
@@ -415,13 +437,17 @@ class counting_iterator {
++count_;
return *this;
}
-
counting_iterator operator++(int) {
auto it = *this;
++*this;
return it;
}
+ friend counting_iterator operator+(counting_iterator it, difference_type n) {
+ it.count_ += static_cast<size_t>(n);
+ return it;
+ }
+
value_type operator*() const { return {}; }
};
@@ -555,23 +581,38 @@ OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) {
[](char c) { return static_cast<char8_type>(c); });
}
-#ifndef FMT_USE_GRISU
-# define FMT_USE_GRISU 1
-#endif
-
-template <typename T> constexpr bool use_grisu() {
- return FMT_USE_GRISU && std::numeric_limits<double>::is_iec559 &&
- sizeof(T) <= sizeof(double);
+template <typename Char, typename InputIt>
+inline counting_iterator copy_str(InputIt begin, InputIt end,
+ counting_iterator it) {
+ return it + (end - begin);
}
template <typename T>
+using is_fast_float = bool_constant<std::numeric_limits<T>::is_iec559 &&
+ sizeof(T) <= sizeof(double)>;
+
+#ifndef FMT_USE_FULL_CACHE_DRAGONBOX
+# define FMT_USE_FULL_CACHE_DRAGONBOX 0
+#endif
+
+template <typename T>
template <typename U>
void buffer<T>::append(const U* begin, const U* end) {
- size_t new_size = size_ + to_unsigned(end - begin);
- reserve(new_size);
- std::uninitialized_copy(begin, end,
- make_checked(ptr_ + size_, capacity_ - size_));
- size_ = new_size;
+ do {
+ auto count = to_unsigned(end - begin);
+ try_reserve(size_ + count);
+ auto free_cap = capacity_ - size_;
+ if (free_cap < count) count = free_cap;
+ std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count));
+ size_ += count;
+ begin += count;
+ } while (begin != end);
+}
+
+template <typename OutputIt, typename T, typename Traits>
+void iterator_buffer<OutputIt, T, Traits>::flush() {
+ out_ = std::copy_n(data_, this->limit(this->size()), out_);
+ this->clear();
}
} // namespace detail
@@ -610,7 +651,7 @@ enum { inline_buffer_size = 500 };
*/
template <typename T, size_t SIZE = inline_buffer_size,
typename Allocator = std::allocator<T>>
-class basic_memory_buffer : public detail::buffer<T> {
+class basic_memory_buffer final : public detail::buffer<T> {
private:
T store_[SIZE];
@@ -624,7 +665,7 @@ class basic_memory_buffer : public detail::buffer<T> {
}
protected:
- void grow(size_t size) FMT_OVERRIDE;
+ void grow(size_t size) final FMT_OVERRIDE;
public:
using value_type = T;
@@ -634,7 +675,7 @@ class basic_memory_buffer : public detail::buffer<T> {
: alloc_(alloc) {
this->set(store_, SIZE);
}
- ~basic_memory_buffer() FMT_OVERRIDE { deallocate(); }
+ ~basic_memory_buffer() { deallocate(); }
private:
// Move data from other to this buffer.
@@ -678,6 +719,22 @@ class basic_memory_buffer : public detail::buffer<T> {
// Returns a copy of the allocator associated with this buffer.
Allocator get_allocator() const { return alloc_; }
+
+ /**
+ Resizes the buffer to contain *count* elements. If T is a POD type new
+ elements may not be initialized.
+ */
+ void resize(size_t count) { this->try_resize(count); }
+
+ /** Increases the buffer capacity to *new_capacity*. */
+ void reserve(size_t new_capacity) { this->try_reserve(new_capacity); }
+
+ // Directly append data into the buffer
+ using detail::buffer<T>::append;
+ template <typename ContiguousRange>
+ void append(const ContiguousRange& range) {
+ append(range.data(), range.data() + range.size());
+ }
};
template <typename T, size_t SIZE, typename Allocator>
@@ -748,19 +805,81 @@ FMT_CONSTEXPR bool is_supported_floating_point(T) {
}
// Smallest of uint32_t, uint64_t, uint128_t that is large enough to
-// represent all values of T.
+// represent all values of an integral type T.
template <typename T>
using uint32_or_64_or_128_t =
- conditional_t<num_bits<T>() <= 32, uint32_t,
+ conditional_t<num_bits<T>() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS,
+ uint32_t,
conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>>;
+// 128-bit integer type used internally
+struct FMT_EXTERN_TEMPLATE_API uint128_wrapper {
+ uint128_wrapper() = default;
+
+#if FMT_USE_INT128
+ uint128_t internal_;
+
+ uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT
+ : internal_{static_cast<uint128_t>(low) |
+ (static_cast<uint128_t>(high) << 64)} {}
+
+ uint128_wrapper(uint128_t u) : internal_{u} {}
+
+ uint64_t high() const FMT_NOEXCEPT { return uint64_t(internal_ >> 64); }
+ uint64_t low() const FMT_NOEXCEPT { return uint64_t(internal_); }
+
+ uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT {
+ internal_ += n;
+ return *this;
+ }
+#else
+ uint64_t high_;
+ uint64_t low_;
+
+ uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT : high_{high},
+ low_{low} {}
+
+ uint64_t high() const FMT_NOEXCEPT { return high_; }
+ uint64_t low() const FMT_NOEXCEPT { return low_; }
+
+ uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT {
+# if defined(_MSC_VER) && defined(_M_X64)
+ unsigned char carry = _addcarry_u64(0, low_, n, &low_);
+ _addcarry_u64(carry, high_, 0, &high_);
+ return *this;
+# else
+ uint64_t sum = low_ + n;
+ high_ += (sum < low_ ? 1 : 0);
+ low_ = sum;
+ return *this;
+# endif
+ }
+#endif
+};
+
+// Table entry type for divisibility test used internally
+template <typename T> struct FMT_EXTERN_TEMPLATE_API divtest_table_entry {
+ T mod_inv;
+ T max_quotient;
+};
+
// Static data is placed in this class template for the header-only config.
template <typename T = void> struct FMT_EXTERN_TEMPLATE_API basic_data {
static const uint64_t powers_of_10_64[];
- static const uint32_t zero_or_powers_of_10_32[];
- static const uint64_t zero_or_powers_of_10_64[];
- static const uint64_t pow10_significands[];
- static const int16_t pow10_exponents[];
+ static const uint32_t zero_or_powers_of_10_32_new[];
+ static const uint64_t zero_or_powers_of_10_64_new[];
+ static const uint64_t grisu_pow10_significands[];
+ static const int16_t grisu_pow10_exponents[];
+ static const divtest_table_entry<uint32_t> divtest_table_for_pow5_32[];
+ static const divtest_table_entry<uint64_t> divtest_table_for_pow5_64[];
+ static const uint64_t dragonbox_pow10_significands_64[];
+ static const uint128_wrapper dragonbox_pow10_significands_128[];
+ // log10(2) = 0x0.4d104d427de7fbcc...
+ static const uint64_t log10_2_significand = 0x4d104d427de7fbcc;
+#if !FMT_USE_FULL_CACHE_DRAGONBOX
+ static const uint64_t powers_of_5_64[];
+ static const uint32_t dragonbox_pow10_recovery_errors[];
+#endif
// GCC generates slightly better code for pairs than chars.
using digit_pair = char[2];
static const digit_pair digits[];
@@ -772,8 +891,23 @@ template <typename T = void> struct FMT_EXTERN_TEMPLATE_API basic_data {
static const char signs[];
static const char left_padding_shifts[5];
static const char right_padding_shifts[5];
+
+ // DEPRECATED! These are for ABI compatibility.
+ static const uint32_t zero_or_powers_of_10_32[];
+ static const uint64_t zero_or_powers_of_10_64[];
};
+// Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)).
+// This is a function instead of an array to workaround a bug in GCC10 (#1810).
+FMT_INLINE uint16_t bsr2log10(int bsr) {
+ static constexpr uint16_t data[] = {
+ 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5,
+ 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10,
+ 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
+ 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20};
+ return data[bsr];
+}
+
#ifndef FMT_EXPORTED
FMT_EXTERN template struct basic_data<void>;
#endif
@@ -785,10 +919,9 @@ struct data : basic_data<> {};
// Returns the number of decimal digits in n. Leading zeros are not counted
// except for n == 0 in which case count_digits returns 1.
inline int count_digits(uint64_t n) {
- // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits.
- int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12;
- return t - (n < data::zero_or_powers_of_10_64[t]) + 1;
+ // https://github.com/fmtlib/format-benchmark/blob/master/digits10
+ auto t = bsr2log10(FMT_BUILTIN_CLZLL(n | 1) ^ 63);
+ return t - (n < data::zero_or_powers_of_10_64_new[t]);
}
#else
// Fallback version of count_digits used when __builtin_clz is not available.
@@ -838,15 +971,24 @@ template <> int count_digits<4>(detail::fallback_uintptr n);
#if FMT_GCC_VERSION || FMT_CLANG_VERSION
# define FMT_ALWAYS_INLINE inline __attribute__((always_inline))
+#elif FMT_MSC_VER
+# define FMT_ALWAYS_INLINE __forceinline
#else
-# define FMT_ALWAYS_INLINE
+# define FMT_ALWAYS_INLINE inline
+#endif
+
+// To suppress unnecessary security cookie checks
+#if FMT_MSC_VER && !FMT_CLANG_VERSION
+# define FMT_SAFEBUFFERS __declspec(safebuffers)
+#else
+# define FMT_SAFEBUFFERS
#endif
#ifdef FMT_BUILTIN_CLZ
// Optional version of count_digits for better performance on 32-bit platforms.
inline int count_digits(uint32_t n) {
- int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12;
- return t - (n < data::zero_or_powers_of_10_32[t]) + 1;
+ auto t = bsr2log10(FMT_BUILTIN_CLZ(n | 1) ^ 31);
+ return t - (n < data::zero_or_powers_of_10_32_new[t]);
}
#endif
@@ -893,7 +1035,7 @@ template <typename Char> void copy2(Char* dst, const char* src) {
*dst++ = static_cast<Char>(*src++);
*dst = static_cast<Char>(*src);
}
-inline void copy2(char* dst, const char* src) { memcpy(dst, src, 2); }
+FMT_INLINE void copy2(char* dst, const char* src) { memcpy(dst, src, 2); }
template <typename Iterator> struct format_decimal_result {
Iterator begin;
@@ -929,11 +1071,10 @@ inline format_decimal_result<Char*> format_decimal(Char* out, UInt value,
template <typename Char, typename UInt, typename Iterator,
FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<Iterator>>::value)>
inline format_decimal_result<Iterator> format_decimal(Iterator out, UInt value,
- int num_digits) {
- // Buffer should be large enough to hold all digits (<= digits10 + 1).
- enum { max_size = digits10<UInt>() + 1 };
- Char buffer[2 * max_size];
- auto end = format_decimal(buffer, value, num_digits).end;
+ int size) {
+ // Buffer is large enough to hold all digits (digits10 + 1).
+ Char buffer[digits10<UInt>() + 1];
+ auto end = format_decimal(buffer, value, size).end;
return {out, detail::copy_str<Char>(buffer, end, out)};
}
@@ -975,6 +1116,10 @@ Char* format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits,
template <unsigned BASE_BITS, typename Char, typename It, typename UInt>
inline It format_uint(It out, UInt value, int num_digits, bool upper = false) {
+ if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
+ format_uint<BASE_BITS>(ptr, value, num_digits, upper);
+ return out;
+ }
// Buffer should be large enough to hold all digits (digits / BASE_BITS + 1).
char buffer[num_bits<UInt>() / BASE_BITS + 1];
format_uint<BASE_BITS>(buffer, value, num_digits, upper);
@@ -1000,8 +1145,8 @@ template <typename T = void> struct null {};
template <typename Char> struct fill_t {
private:
enum { max_size = 4 };
- Char data_[max_size];
- unsigned char size_;
+ Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)};
+ unsigned char size_ = 1;
public:
FMT_CONSTEXPR void operator=(basic_string_view<Char> s) {
@@ -1021,13 +1166,6 @@ template <typename Char> struct fill_t {
FMT_CONSTEXPR const Char& operator[](size_t index) const {
return data_[index];
}
-
- static FMT_CONSTEXPR fill_t<Char> make() {
- auto fill = fill_t<Char>();
- fill[0] = Char(' ');
- fill.size_ = 1;
- return fill;
- }
};
} // namespace detail
@@ -1059,13 +1197,84 @@ template <typename Char> struct basic_format_specs {
type(0),
align(align::none),
sign(sign::none),
- alt(false),
- fill(detail::fill_t<Char>::make()) {}
+ alt(false) {}
};
using format_specs = basic_format_specs<char>;
namespace detail {
+namespace dragonbox {
+
+// Type-specific information that Dragonbox uses.
+template <class T> struct float_info;
+
+template <> struct float_info<float> {
+ using carrier_uint = uint32_t;
+ static const int significand_bits = 23;
+ static const int exponent_bits = 8;
+ static const int min_exponent = -126;
+ static const int max_exponent = 127;
+ static const int exponent_bias = -127;
+ static const int decimal_digits = 9;
+ static const int kappa = 1;
+ static const int big_divisor = 100;
+ static const int small_divisor = 10;
+ static const int min_k = -31;
+ static const int max_k = 46;
+ static const int cache_bits = 64;
+ static const int divisibility_check_by_5_threshold = 39;
+ static const int case_fc_pm_half_lower_threshold = -1;
+ static const int case_fc_pm_half_upper_threshold = 6;
+ static const int case_fc_lower_threshold = -2;
+ static const int case_fc_upper_threshold = 6;
+ static const int case_shorter_interval_left_endpoint_lower_threshold = 2;
+ static const int case_shorter_interval_left_endpoint_upper_threshold = 3;
+ static const int shorter_interval_tie_lower_threshold = -35;
+ static const int shorter_interval_tie_upper_threshold = -35;
+ static const int max_trailing_zeros = 7;
+};
+
+template <> struct float_info<double> {
+ using carrier_uint = uint64_t;
+ static const int significand_bits = 52;
+ static const int exponent_bits = 11;
+ static const int min_exponent = -1022;
+ static const int max_exponent = 1023;
+ static const int exponent_bias = -1023;
+ static const int decimal_digits = 17;
+ static const int kappa = 2;
+ static const int big_divisor = 1000;
+ static const int small_divisor = 100;
+ static const int min_k = -292;
+ static const int max_k = 326;
+ static const int cache_bits = 128;
+ static const int divisibility_check_by_5_threshold = 86;
+ static const int case_fc_pm_half_lower_threshold = -2;
+ static const int case_fc_pm_half_upper_threshold = 9;
+ static const int case_fc_lower_threshold = -4;
+ static const int case_fc_upper_threshold = 9;
+ static const int case_shorter_interval_left_endpoint_lower_threshold = 2;
+ static const int case_shorter_interval_left_endpoint_upper_threshold = 3;
+ static const int shorter_interval_tie_lower_threshold = -77;
+ static const int shorter_interval_tie_upper_threshold = -77;
+ static const int max_trailing_zeros = 16;
+};
+
+template <typename T> struct decimal_fp {
+ using significand_type = typename float_info<T>::carrier_uint;
+ significand_type significand;
+ int exponent;
+};
+
+template <typename T> FMT_API decimal_fp<T> to_decimal(T x) FMT_NOEXCEPT;
+} // namespace dragonbox
+
+template <typename T>
+constexpr typename dragonbox::float_info<T>::carrier_uint exponent_mask() {
+ using uint = typename dragonbox::float_info<T>::carrier_uint;
+ return ((uint(1) << dragonbox::float_info<T>::exponent_bits) - 1)
+ << dragonbox::float_info<T>::significand_bits;
+}
// A floating-point presentation format.
enum class float_format : unsigned char {
@@ -1107,113 +1316,6 @@ template <typename Char, typename It> It write_exponent(int exp, It it) {
return it;
}
-template <typename Char> class float_writer {
- private:
- // The number is given as v = digits_ * pow(10, exp_).
- const char* digits_;
- int num_digits_;
- int exp_;
- size_t size_;
- float_specs specs_;
- Char decimal_point_;
-
- template <typename It> It prettify(It it) const {
- // pow(10, full_exp - 1) <= v <= pow(10, full_exp).
- int full_exp = num_digits_ + exp_;
- if (specs_.format == float_format::exp) {
- // Insert a decimal point after the first digit and add an exponent.
- *it++ = static_cast<Char>(*digits_);
- int num_zeros = specs_.precision - num_digits_;
- if (num_digits_ > 1 || specs_.showpoint) *it++ = decimal_point_;
- it = copy_str<Char>(digits_ + 1, digits_ + num_digits_, it);
- if (num_zeros > 0 && specs_.showpoint)
- it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
- *it++ = static_cast<Char>(specs_.upper ? 'E' : 'e');
- return write_exponent<Char>(full_exp - 1, it);
- }
- if (num_digits_ <= full_exp) {
- // 1234e7 -> 12340000000[.0+]
- it = copy_str<Char>(digits_, digits_ + num_digits_, it);
- it = std::fill_n(it, full_exp - num_digits_, static_cast<Char>('0'));
- if (specs_.showpoint || specs_.precision < 0) {
- *it++ = decimal_point_;
- int num_zeros = specs_.precision - full_exp;
- if (num_zeros <= 0) {
- if (specs_.format != float_format::fixed)
- *it++ = static_cast<Char>('0');
- return it;
- }
-#ifdef FMT_FUZZ
- if (num_zeros > 5000)
- throw std::runtime_error("fuzz mode - avoiding excessive cpu use");
-#endif
- it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
- }
- } else if (full_exp > 0) {
- // 1234e-2 -> 12.34[0+]
- it = copy_str<Char>(digits_, digits_ + full_exp, it);
- if (!specs_.showpoint) {
- // Remove trailing zeros.
- int num_digits = num_digits_;
- while (num_digits > full_exp && digits_[num_digits - 1] == '0')
- --num_digits;
- if (num_digits != full_exp) *it++ = decimal_point_;
- return copy_str<Char>(digits_ + full_exp, digits_ + num_digits, it);
- }
- *it++ = decimal_point_;
- it = copy_str<Char>(digits_ + full_exp, digits_ + num_digits_, it);
- if (specs_.precision > num_digits_) {
- // Add trailing zeros.
- int num_zeros = specs_.precision - num_digits_;
- it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
- }
- } else {
- // 1234e-6 -> 0.001234
- *it++ = static_cast<Char>('0');
- int num_zeros = -full_exp;
- int num_digits = num_digits_;
- if (num_digits == 0 && specs_.precision >= 0 &&
- specs_.precision < num_zeros) {
- num_zeros = specs_.precision;
- }
- // Remove trailing zeros.
- if (!specs_.showpoint)
- while (num_digits > 0 && digits_[num_digits - 1] == '0') --num_digits;
- if (num_zeros != 0 || num_digits != 0 || specs_.showpoint) {
- *it++ = decimal_point_;
- it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
- it = copy_str<Char>(digits_, digits_ + num_digits, it);
- }
- }
- return it;
- }
-
- public:
- float_writer(const char* digits, int num_digits, int exp, float_specs specs,
- Char decimal_point)
- : digits_(digits),
- num_digits_(num_digits),
- exp_(exp),
- specs_(specs),
- decimal_point_(decimal_point) {
- int full_exp = num_digits + exp - 1;
- int precision = specs.precision > 0 ? specs.precision : 16;
- if (specs_.format == float_format::general &&
- !(full_exp >= -4 && full_exp < precision)) {
- specs_.format = float_format::exp;
- }
- size_ = prettify(counting_iterator()).count();
- size_ += specs.sign ? 1 : 0;
- }
-
- size_t size() const { return size_; }
-
- template <typename It> It operator()(It it) const {
- if (specs_.sign) *it++ = static_cast<Char>(data::signs[specs_.sign]);
- return prettify(it);
- }
-};
-
template <typename T>
int format_float(T value, int precision, float_specs specs, buffer<char>& buf);
@@ -1392,7 +1494,7 @@ template <align::type align = align::left, typename OutputIt, typename Char,
typename F>
inline OutputIt write_padded(OutputIt out,
const basic_format_specs<Char>& specs, size_t size,
- size_t width, const F& f) {
+ size_t width, F&& f) {
static_assert(align == align::left || align == align::right, "");
unsigned spec_width = to_unsigned(specs.width);
size_t padding = spec_width > width ? spec_width - width : 0;
@@ -1410,7 +1512,7 @@ template <align::type align = align::left, typename OutputIt, typename Char,
typename F>
inline OutputIt write_padded(OutputIt out,
const basic_format_specs<Char>& specs, size_t size,
- const F& f) {
+ F&& f) {
return write_padded<align>(out, specs, size, size, f);
}
@@ -1577,15 +1679,16 @@ template <typename OutputIt, typename Char, typename UInt> struct int_writer {
char digits[40];
format_decimal(digits, abs_value, num_digits);
basic_memory_buffer<Char> buffer;
- size += prefix_size;
- buffer.resize(size);
+ size += static_cast<int>(prefix_size);
+ const auto usize = to_unsigned(size);
+ buffer.resize(usize);
basic_string_view<Char> s(&sep, sep_size);
// Index of a decimal digit with the least significant digit having index 0.
int digit_index = 0;
group = groups.cbegin();
- auto p = buffer.data() + size;
- for (int i = num_digits - 1; i >= 0; --i) {
- *--p = static_cast<Char>(digits[i]);
+ auto p = buffer.data() + size - 1;
+ for (int i = num_digits - 1; i > 0; --i) {
+ *p-- = static_cast<Char>(digits[i]);
if (*group <= 0 || ++digit_index % *group != 0 ||
*group == max_value<char>())
continue;
@@ -1593,16 +1696,16 @@ template <typename OutputIt, typename Char, typename UInt> struct int_writer {
digit_index = 0;
++group;
}
- p -= s.size();
std::uninitialized_copy(s.data(), s.data() + s.size(),
make_checked(p, s.size()));
+ p -= s.size();
}
- if (prefix_size != 0) p[-1] = static_cast<Char>('-');
- using iterator = remove_reference_t<decltype(reserve(out, 0))>;
+ *p-- = static_cast<Char>(*digits);
+ if (prefix_size != 0) *p = static_cast<Char>('-');
auto data = buffer.data();
- out = write_padded<align::right>(out, specs, size, size, [=](iterator it) {
- return copy_str<Char>(data, data + size, it);
- });
+ out = write_padded<align::right>(
+ out, specs, usize, usize,
+ [=](iterator it) { return copy_str<Char>(data, data + size, it); });
}
void on_chr() { *out++ = static_cast<Char>(abs_value); }
@@ -1628,6 +1731,168 @@ OutputIt write_nonfinite(OutputIt out, bool isinf,
});
}
+// A decimal floating-point number significand * pow(10, exp).
+struct big_decimal_fp {
+ const char* significand;
+ int significand_size;
+ int exponent;
+};
+
+inline int get_significand_size(const big_decimal_fp& fp) {
+ return fp.significand_size;
+}
+template <typename T>
+inline int get_significand_size(const dragonbox::decimal_fp<T>& fp) {
+ return count_digits(fp.significand);
+}
+
+template <typename Char, typename OutputIt>
+inline OutputIt write_significand(OutputIt out, const char* significand,
+ int& significand_size) {
+ return copy_str<Char>(significand, significand + significand_size, out);
+}
+template <typename Char, typename OutputIt, typename UInt>
+inline OutputIt write_significand(OutputIt out, UInt significand,
+ int significand_size) {
+ return format_decimal<Char>(out, significand, significand_size).end;
+}
+
+template <typename Char, typename UInt,
+ FMT_ENABLE_IF(std::is_integral<UInt>::value)>
+inline Char* write_significand(Char* out, UInt significand,
+ int significand_size, int integral_size,
+ Char decimal_point) {
+ if (!decimal_point)
+ return format_decimal(out, significand, significand_size).end;
+ auto end = format_decimal(out + 1, significand, significand_size).end;
+ if (integral_size == 1)
+ out[0] = out[1];
+ else
+ std::copy_n(out + 1, integral_size, out);
+ out[integral_size] = decimal_point;
+ return end;
+}
+
+template <typename OutputIt, typename UInt, typename Char,
+ FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<OutputIt>>::value)>
+inline OutputIt write_significand(OutputIt out, UInt significand,
+ int significand_size, int integral_size,
+ Char decimal_point) {
+ // Buffer is large enough to hold digits (digits10 + 1) and a decimal point.
+ Char buffer[digits10<UInt>() + 2];
+ auto end = write_significand(buffer, significand, significand_size,
+ integral_size, decimal_point);
+ return detail::copy_str<Char>(buffer, end, out);
+}
+
+template <typename OutputIt, typename Char>
+inline OutputIt write_significand(OutputIt out, const char* significand,
+ int significand_size, int integral_size,
+ Char decimal_point) {
+ out = detail::copy_str<Char>(significand, significand + integral_size, out);
+ if (!decimal_point) return out;
+ *out++ = decimal_point;
+ return detail::copy_str<Char>(significand + integral_size,
+ significand + significand_size, out);
+}
+
+template <typename OutputIt, typename DecimalFP, typename Char>
+OutputIt write_float(OutputIt out, const DecimalFP& fp,
+ const basic_format_specs<Char>& specs, float_specs fspecs,
+ Char decimal_point) {
+ auto significand = fp.significand;
+ int significand_size = get_significand_size(fp);
+ static const Char zero = static_cast<Char>('0');
+ auto sign = fspecs.sign;
+ size_t size = to_unsigned(significand_size) + (sign ? 1 : 0);
+ using iterator = remove_reference_t<decltype(reserve(out, 0))>;
+
+ int output_exp = fp.exponent + significand_size - 1;
+ auto use_exp_format = [=]() {
+ if (fspecs.format == float_format::exp) return true;
+ if (fspecs.format != float_format::general) return false;
+ // Use the fixed notation if the exponent is in [exp_lower, exp_upper),
+ // e.g. 0.0001 instead of 1e-04. Otherwise use the exponent notation.
+ const int exp_lower = -4, exp_upper = 16;
+ return output_exp < exp_lower ||
+ output_exp >= (fspecs.precision > 0 ? fspecs.precision : exp_upper);
+ };
+ if (use_exp_format()) {
+ int num_zeros = 0;
+ if (fspecs.showpoint) {
+ num_zeros = (std::max)(fspecs.precision - significand_size, 0);
+ size += to_unsigned(num_zeros);
+ } else if (significand_size == 1) {
+ decimal_point = Char();
+ }
+ auto abs_output_exp = output_exp >= 0 ? output_exp : -output_exp;
+ int exp_digits = 2;
+ if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3;
+
+ size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits);
+ char exp_char = fspecs.upper ? 'E' : 'e';
+ auto write = [=](iterator it) {
+ if (sign) *it++ = static_cast<Char>(data::signs[sign]);
+ // Insert a decimal point after the first digit and add an exponent.
+ it = write_significand(it, significand, significand_size, 1,
+ decimal_point);
+ if (num_zeros > 0) it = std::fill_n(it, num_zeros, zero);
+ *it++ = static_cast<Char>(exp_char);
+ return write_exponent<Char>(output_exp, it);
+ };
+ return specs.width > 0 ? write_padded<align::right>(out, specs, size, write)
+ : base_iterator(out, write(reserve(out, size)));
+ }
+
+ int exp = fp.exponent + significand_size;
+ if (fp.exponent >= 0) {
+ // 1234e5 -> 123400000[.0+]
+ size += to_unsigned(fp.exponent);
+ int num_zeros = fspecs.precision - exp;
+#ifdef FMT_FUZZ
+ if (num_zeros > 5000)
+ throw std::runtime_error("fuzz mode - avoiding excessive cpu use");
+#endif
+ if (fspecs.showpoint) {
+ if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1;
+ if (num_zeros > 0) size += to_unsigned(num_zeros);
+ }
+ return write_padded<align::right>(out, specs, size, [&](iterator it) {
+ if (sign) *it++ = static_cast<Char>(data::signs[sign]);
+ it = write_significand<Char>(it, significand, significand_size);
+ it = std::fill_n(it, fp.exponent, zero);
+ if (!fspecs.showpoint) return it;
+ *it++ = decimal_point;
+ return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it;
+ });
+ } else if (exp > 0) {
+ // 1234e-2 -> 12.34[0+]
+ int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0;
+ size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0);
+ return write_padded<align::right>(out, specs, size, [&](iterator it) {
+ if (sign) *it++ = static_cast<Char>(data::signs[sign]);
+ it = write_significand(it, significand, significand_size, exp,
+ decimal_point);
+ return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it;
+ });
+ }
+ // 1234e-6 -> 0.001234
+ int num_zeros = -exp;
+ if (significand_size == 0 && fspecs.precision >= 0 &&
+ fspecs.precision < num_zeros) {
+ num_zeros = fspecs.precision;
+ }
+ size += 2 + to_unsigned(num_zeros);
+ return write_padded<align::right>(out, specs, size, [&](iterator it) {
+ if (sign) *it++ = static_cast<Char>(data::signs[sign]);
+ *it++ = zero;
+ if (num_zeros == 0 && significand_size == 0 && !fspecs.showpoint) return it;
+ *it++ = decimal_point;
+ it = std::fill_n(it, num_zeros, zero);
+ return write_significand<Char>(it, significand, significand_size);
+ });
+}
+
template <typename Char, typename OutputIt, typename T,
FMT_ENABLE_IF(std::is_floating_point<T>::value)>
OutputIt write(OutputIt out, T value, basic_format_specs<Char> specs,
@@ -1667,39 +1932,45 @@ OutputIt write(OutputIt out, T value, basic_format_specs<Char> specs,
++precision;
}
if (const_check(std::is_same<T, float>())) fspecs.binary32 = true;
- fspecs.use_grisu = use_grisu<T>();
+ fspecs.use_grisu = is_fast_float<T>();
int exp = format_float(promote_float(value), precision, fspecs, buffer);
fspecs.precision = precision;
Char point =
fspecs.locale ? decimal_point<Char>(loc) : static_cast<Char>('.');
- float_writer<Char> w(buffer.data(), static_cast<int>(buffer.size()), exp,
- fspecs, point);
- return write_padded<align::right>(out, specs, w.size(), w);
+ auto fp = big_decimal_fp{buffer.data(), static_cast<int>(buffer.size()), exp};
+ return write_float(out, fp, specs, fspecs, point);
}
template <typename Char, typename OutputIt, typename T,
- FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+ FMT_ENABLE_IF(is_fast_float<T>::value)>
OutputIt write(OutputIt out, T value) {
if (const_check(!is_supported_floating_point(value))) return out;
+
+ using floaty = conditional_t<std::is_same<T, long double>::value, double, T>;
+ using uint = typename dragonbox::float_info<floaty>::carrier_uint;
+ auto bits = bit_cast<uint>(value);
+
auto fspecs = float_specs();
- if (std::signbit(value)) { // value < 0 is false for NaN so use signbit.
+ auto sign_bit = bits & (uint(1) << (num_bits<uint>() - 1));
+ if (sign_bit != 0) {
fspecs.sign = sign::minus;
value = -value;
}
- auto specs = basic_format_specs<Char>();
- if (!std::isfinite(value))
+ static const auto specs = basic_format_specs<Char>();
+ uint mask = exponent_mask<floaty>();
+ if ((bits & mask) == mask)
return write_nonfinite(out, std::isinf(value), specs, fspecs);
- memory_buffer buffer;
- int precision = -1;
- if (const_check(std::is_same<T, float>())) fspecs.binary32 = true;
- fspecs.use_grisu = use_grisu<T>();
- int exp = format_float(promote_float(value), precision, fspecs, buffer);
- fspecs.precision = precision;
- float_writer<Char> w(buffer.data(), static_cast<int>(buffer.size()), exp,
- fspecs, static_cast<Char>('.'));
- return base_iterator(out, w(reserve(out, w.size())));
+ auto dec = dragonbox::to_decimal(static_cast<floaty>(value));
+ return write_float(out, dec, specs, fspecs, static_cast<Char>('.'));
+}
+
+template <typename Char, typename OutputIt, typename T,
+ FMT_ENABLE_IF(std::is_floating_point<T>::value &&
+ !is_fast_float<T>::value)>
+inline OutputIt write(OutputIt out, T value) {
+ return write(out, value, basic_format_specs<Char>());
}
template <typename Char, typename OutputIt>
@@ -1752,6 +2023,13 @@ OutputIt write(OutputIt out, basic_string_view<Char> value) {
return base_iterator(out, it);
}
+template <typename Char>
+buffer_appender<Char> write(buffer_appender<Char> out,
+ basic_string_view<Char> value) {
+ get_container(out).append(value.begin(), value.end());
+ return out;
+}
+
template <typename Char, typename OutputIt, typename T,
FMT_ENABLE_IF(is_integral<T>::value &&
!std::is_same<T, bool>::value &&
@@ -1762,7 +2040,13 @@ OutputIt write(OutputIt out, T value) {
// Don't do -abs_value since it trips unsigned-integer-overflow sanitizer.
if (negative) abs_value = ~abs_value + 1;
int num_digits = count_digits(abs_value);
- auto it = reserve(out, (negative ? 1 : 0) + static_cast<size_t>(num_digits));
+ auto size = (negative ? 1 : 0) + static_cast<size_t>(num_digits);
+ auto it = reserve(out, size);
+ if (auto ptr = to_pointer<Char>(it, size)) {
+ if (negative) *ptr++ = static_cast<Char>('-');
+ format_decimal<Char>(ptr, abs_value, num_digits);
+ return out;
+ }
if (negative) *it++ = static_cast<Char>('-');
it = format_decimal<Char>(it, abs_value, num_digits).end;
return base_iterator(out, it);
@@ -1801,8 +2085,13 @@ auto write(OutputIt out, const T& value) -> typename std::enable_if<
mapped_type_constant<T, basic_format_context<OutputIt, Char>>::value ==
type::custom_type,
OutputIt>::type {
- basic_format_context<OutputIt, Char> ctx(out, {}, {});
- return formatter<T>().format(value, ctx);
+ using context_type = basic_format_context<OutputIt, Char>;
+ using formatter_type =
+ conditional_t<has_formatter<T, context_type>::value,
+ typename context_type::template formatter_type<T>,
+ fallback_formatter<T, Char>>;
+ context_type ctx(out, {}, {});
+ return formatter_type().format(value, ctx);
}
// An argument visitor that formats the argument and writes it via the output
@@ -2008,6 +2297,48 @@ class arg_formatter_base {
}
};
+/** The default argument formatter. */
+template <typename OutputIt, typename Char>
+class arg_formatter : public arg_formatter_base<OutputIt, Char> {
+ private:
+ using char_type = Char;
+ using base = arg_formatter_base<OutputIt, Char>;
+ using context_type = basic_format_context<OutputIt, Char>;
+
+ context_type& ctx_;
+ basic_format_parse_context<char_type>* parse_ctx_;
+ const Char* ptr_;
+
+ public:
+ using iterator = typename base::iterator;
+ using format_specs = typename base::format_specs;
+
+ /**
+ \rst
+ Constructs an argument formatter object.
+ *ctx* is a reference to the formatting context,
+ *specs* contains format specifier information for standard argument types.
+ \endrst
+ */
+ explicit arg_formatter(
+ context_type& ctx,
+ basic_format_parse_context<char_type>* parse_ctx = nullptr,
+ format_specs* specs = nullptr, const Char* ptr = nullptr)
+ : base(ctx.out(), specs, ctx.locale()),
+ ctx_(ctx),
+ parse_ctx_(parse_ctx),
+ ptr_(ptr) {}
+
+ using base::operator();
+
+ /** Formats an argument of a user-defined type. */
+ iterator operator()(typename basic_format_arg<context_type>::handle handle) {
+ if (ptr_) advance_to(*parse_ctx_, ptr_);
+ handle.format(*parse_ctx_, ctx_);
+ return ctx_.out();
+ }
+};
+
template <typename Char> FMT_CONSTEXPR bool is_name_start(Char c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c;
}
@@ -2047,12 +2378,11 @@ template <typename Context> class custom_formatter {
Context& ctx)
: parse_ctx_(parse_ctx), ctx_(ctx) {}
- bool operator()(typename basic_format_arg<Context>::handle h) const {
+ void operator()(typename basic_format_arg<Context>::handle h) const {
h.format(parse_ctx_, ctx_);
- return true;
}
- template <typename T> bool operator()(T) const { return false; }
+ template <typename T> void operator()(T) const {}
};
template <typename T>
@@ -2434,12 +2764,30 @@ template <typename SpecHandler, typename Char> struct precision_adapter {
};
template <typename Char>
-FMT_CONSTEXPR const Char* next_code_point(const Char* begin, const Char* end) {
- if (const_check(sizeof(Char) != 1) || (*begin & 0x80) == 0) return begin + 1;
- do {
- ++begin;
- } while (begin != end && (*begin & 0xc0) == 0x80);
- return begin;
+FMT_CONSTEXPR int code_point_length(const Char* begin) {
+ if (const_check(sizeof(Char) != 1)) return 1;
+ constexpr char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0};
+ int len = lengths[static_cast<unsigned char>(*begin) >> 3];
+
+ // Compute the pointer to the next character early so that the next
+ // iteration can start working on the next character. Neither Clang
+ // nor GCC figure out this reordering on their own.
+ return len + !len;
+}
+
+template <typename Char> constexpr bool is_ascii_letter(Char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+// Converts a character to ASCII. Returns a number > 127 on conversion failure.
+template <typename Char, FMT_ENABLE_IF(std::is_integral<Char>::value)>
+constexpr Char to_ascii(Char value) {
+ return value;
+}
+template <typename Char, FMT_ENABLE_IF(std::is_enum<Char>::value)>
+constexpr typename std::underlying_type<Char>::type to_ascii(Char value) {
+ return value;
}
// Parses fill and alignment.
@@ -2448,10 +2796,10 @@ FMT_CONSTEXPR const Char* parse_align(const Char* begin, const Char* end,
Handler&& handler) {
FMT_ASSERT(begin != end, "");
auto align = align::none;
- auto p = next_code_point(begin, end);
- if (p == end) p = begin;
+ auto p = begin + code_point_length(begin);
+ if (p >= end) p = begin;
for (;;) {
- switch (static_cast<char>(*p)) {
+ switch (to_ascii(*p)) {
case '<':
align = align::left;
break;
@@ -2530,13 +2878,13 @@ FMT_CONSTEXPR const Char* parse_precision(const Char* begin, const Char* end,
template <typename Char, typename SpecHandler>
FMT_CONSTEXPR const Char* parse_format_specs(const Char* begin, const Char* end,
SpecHandler&& handler) {
- if (begin == end || *begin == '}') return begin;
+ if (begin == end) return begin;
begin = parse_align(begin, end, handler);
if (begin == end) return begin;
// Parse sign.
- switch (static_cast<char>(*begin)) {
+ switch (to_ascii(*begin)) {
case '+':
handler.on_plus();
++begin;
@@ -2613,7 +2961,7 @@ FMT_CONSTEXPR const Char* parse_replacement_field(const Char* begin,
Handler&& handler) {
++begin;
if (begin == end) return handler.on_error("invalid format string"), end;
- if (static_cast<char>(*begin) == '}') {
+ if (*begin == '}') {
handler.on_replacement_field(handler.on_arg_id(), begin);
} else if (*begin == '{') {
handler.on_text(begin, begin + 1);
@@ -2658,17 +3006,17 @@ FMT_CONSTEXPR_DECL FMT_INLINE void parse_format_string(
return;
}
struct writer {
- FMT_CONSTEXPR void operator()(const Char* begin, const Char* end) {
- if (begin == end) return;
+ FMT_CONSTEXPR void operator()(const Char* pbegin, const Char* pend) {
+ if (pbegin == pend) return;
for (;;) {
const Char* p = nullptr;
- if (!find<IS_CONSTEXPR>(begin, end, '}', p))
- return handler_.on_text(begin, end);
+ if (!find<IS_CONSTEXPR>(pbegin, pend, '}', p))
+ return handler_.on_text(pbegin, pend);
++p;
- if (p == end || *p != '}')
+ if (p == pend || *p != '}')
return handler_.on_error("unmatched '}' in format string");
- handler_.on_text(begin, p);
- begin = p + 1;
+ handler_.on_text(pbegin, p);
+ pbegin = p + 1;
}
}
Handler& handler_;
@@ -2699,13 +3047,12 @@ FMT_CONSTEXPR const typename ParseContext::char_type* parse_format_specs(
return f.parse(ctx);
}
-template <typename ArgFormatter, typename Char, typename Context>
+template <typename OutputIt, typename Char, typename Context>
struct format_handler : detail::error_handler {
basic_format_parse_context<Char> parse_context;
Context context;
- format_handler(typename ArgFormatter::iterator out,
- basic_string_view<Char> str,
+ format_handler(OutputIt out, basic_string_view<Char> str,
basic_format_args<Context> format_args, detail::locale_ref loc)
: parse_context(str), context(out, format_args, loc) {}
@@ -2728,26 +3075,33 @@ struct format_handler : detail::error_handler {
FMT_INLINE void on_replacement_field(int id, const Char*) {
auto arg = get_arg(context, id);
context.advance_to(visit_format_arg(
- default_arg_formatter<typename ArgFormatter::iterator, Char>{
- context.out(), context.args(), context.locale()},
+ default_arg_formatter<OutputIt, Char>{context.out(), context.args(),
+ context.locale()},
arg));
}
const Char* on_format_specs(int id, const Char* begin, const Char* end) {
- advance_to(parse_context, begin);
auto arg = get_arg(context, id);
- custom_formatter<Context> f(parse_context, context);
- if (visit_format_arg(f, arg)) return parse_context.begin();
- basic_format_specs<Char> specs;
- using parse_context_t = basic_format_parse_context<Char>;
- specs_checker<specs_handler<parse_context_t, Context>> handler(
- specs_handler<parse_context_t, Context>(specs, parse_context, context),
- arg.type());
- begin = parse_format_specs(begin, end, handler);
- if (begin == end || *begin != '}') on_error("missing '}' in format string");
- advance_to(parse_context, begin);
- context.advance_to(
- visit_format_arg(ArgFormatter(context, &parse_context, &specs), arg));
+ if (arg.type() == type::custom_type) {
+ advance_to(parse_context, begin);
+ visit_format_arg(custom_formatter<Context>(parse_context, context), arg);
+ return parse_context.begin();
+ }
+ auto specs = basic_format_specs<Char>();
+ if (begin + 1 < end && begin[1] == '}' && is_ascii_letter(*begin)) {
+ specs.type = static_cast<char>(*begin++);
+ } else {
+ using parse_context_t = basic_format_parse_context<Char>;
+ specs_checker<specs_handler<parse_context_t, Context>> handler(
+ specs_handler<parse_context_t, Context>(specs, parse_context,
+ context),
+ arg.type());
+ begin = parse_format_specs(begin, end, handler);
+ if (begin == end || *begin != '}')
+ on_error("missing '}' in format string");
+ }
+ context.advance_to(visit_format_arg(
+ arg_formatter<OutputIt, Char>(context, &parse_context, &specs), arg));
return begin;
}
};
@@ -2899,53 +3253,11 @@ FMT_API void format_error_code(buffer<char>& out, int error_code,
FMT_API void report_error(format_func func, int error_code,
string_view message) FMT_NOEXCEPT;
-
-/** The default argument formatter. */
-template <typename OutputIt, typename Char>
-class arg_formatter : public arg_formatter_base<OutputIt, Char> {
- private:
- using char_type = Char;
- using base = arg_formatter_base<OutputIt, Char>;
- using context_type = basic_format_context<OutputIt, Char>;
-
- context_type& ctx_;
- basic_format_parse_context<char_type>* parse_ctx_;
- const Char* ptr_;
-
- public:
- using iterator = typename base::iterator;
- using format_specs = typename base::format_specs;
-
- /**
- \rst
- Constructs an argument formatter object.
- *ctx* is a reference to the formatting context,
- *specs* contains format specifier information for standard argument types.
- \endrst
- */
- explicit arg_formatter(
- context_type& ctx,
- basic_format_parse_context<char_type>* parse_ctx = nullptr,
- format_specs* specs = nullptr, const Char* ptr = nullptr)
- : base(ctx.out(), specs, ctx.locale()),
- ctx_(ctx),
- parse_ctx_(parse_ctx),
- ptr_(ptr) {}
-
- using base::operator();
-
- /** Formats an argument of a user-defined type. */
- iterator operator()(typename basic_format_arg<context_type>::handle handle) {
- if (ptr_) advance_to(*parse_ctx_, ptr_);
- handle.format(*parse_ctx_, ctx_);
- return ctx_.out();
- }
-};
} // namespace detail
template <typename OutputIt, typename Char>
using arg_formatter FMT_DEPRECATED_ALIAS =
- detail::arg_formatter<OutputIt, Char>;
+ detail::arg_formatter<OutputIt, Char>;
/**
An error returned by an operating system or a language runtime,
@@ -3208,8 +3520,10 @@ struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> {
// using variant = std::variant<int, std::string>;
// template <>
// struct formatter<variant>: dynamic_formatter<> {
-// void format(buffer &buf, const variant &v, context &ctx) {
-// visit([&](const auto &val) { format(buf, val, ctx); }, v);
+// auto format(const variant& v, format_context& ctx) {
+// return visit([&](const auto& val) {
+// return dynamic_formatter<>::format(val, ctx);
+// }, v);
// }
// };
template <typename Char = char> class dynamic_formatter {
@@ -3277,28 +3591,15 @@ FMT_CONSTEXPR void advance_to(
ctx.advance_to(ctx.begin() + (p - &*ctx.begin()));
}
-/** Formats arguments and writes the output to the range. */
-template <typename ArgFormatter, typename Char, typename Context>
-typename Context::iterator vformat_to(
- typename ArgFormatter::iterator out, basic_string_view<Char> format_str,
- basic_format_args<Context> args,
- detail::locale_ref loc = detail::locale_ref()) {
- if (format_str.size() == 2 && detail::equal2(format_str.data(), "{}")) {
- auto arg = args.get(0);
- if (!arg) detail::error_handler().on_error("argument not found");
- using iterator = typename ArgFormatter::iterator;
- return visit_format_arg(
- detail::default_arg_formatter<iterator, Char>{out, args, loc}, arg);
- }
- detail::format_handler<ArgFormatter, Char, Context> h(out, format_str, args,
- loc);
- detail::parse_format_string<false>(format_str, h);
- return h.context.out();
-}
+/**
+ \rst
+ Converts ``p`` to ``const void*`` for pointer formatting.
-// Casts ``p`` to ``const void*`` for pointer formatting.
-// Example:
-// auto s = format("{}", ptr(p));
+ **Example**::
+
+ auto s = fmt::format("{}", fmt::ptr(p));
+ \endrst
+ */
template <typename T> inline const void* ptr(const T* p) { return p; }
template <typename T> inline const void* ptr(const std::unique_ptr<T>& p) {
return p.get();
@@ -3317,6 +3618,10 @@ class bytes {
};
template <> struct formatter<bytes> {
+ private:
+ detail::dynamic_format_specs<char> specs_;
+
+ public:
template <typename ParseContext>
FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
using handler_type = detail::dynamic_specs_handler<ParseContext>;
@@ -3335,9 +3640,6 @@ template <> struct formatter<bytes> {
specs_.precision, specs_.precision_ref, ctx);
return detail::write_bytes(ctx.out(), b.data_, specs_);
}
-
- private:
- detail::dynamic_format_specs<char> specs_;
};
template <typename It, typename Sentinel, typename Char>
@@ -3402,15 +3704,14 @@ arg_join<It, Sentinel, wchar_t> join(It begin, Sentinel end, wstring_view sep) {
\endrst
*/
template <typename Range>
-arg_join<detail::iterator_t<const Range>, detail::sentinel_t<const Range>, char>
-join(const Range& range, string_view sep) {
+arg_join<detail::iterator_t<Range>, detail::sentinel_t<Range>, char> join(
+ Range&& range, string_view sep) {
return join(std::begin(range), std::end(range), sep);
}
template <typename Range>
-arg_join<detail::iterator_t<const Range>, detail::sentinel_t<const Range>,
- wchar_t>
-join(const Range& range, wstring_view sep) {
+arg_join<detail::iterator_t<Range>, detail::sentinel_t<Range>, wchar_t> join(
+ Range&& range, wstring_view sep) {
return join(std::begin(range), std::end(range), sep);
}
@@ -3437,7 +3738,7 @@ inline std::string to_string(T value) {
// The buffer should be large enough to store the number including the sign or
// "false" for bool.
constexpr int max_size = detail::digits10<T>() + 2;
- char buffer[max_size > 5 ? max_size : 5];
+ char buffer[max_size > 5 ? static_cast<unsigned>(max_size) : 5];
char* begin = buffer;
return std::string(begin, detail::write<char>(begin, value));
}
@@ -3457,18 +3758,30 @@ std::basic_string<Char> to_string(const basic_memory_buffer<Char, SIZE>& buf) {
}
template <typename Char>
-typename buffer_context<Char>::iterator detail::vformat_to(
+void detail::vformat_to(
detail::buffer<Char>& buf, basic_string_view<Char> format_str,
- basic_format_args<buffer_context<type_identity_t<Char>>> args) {
- using af = arg_formatter<typename buffer_context<Char>::iterator, Char>;
- return vformat_to<af>(std::back_inserter(buf), to_string_view(format_str),
- args);
+ basic_format_args<buffer_context<type_identity_t<Char>>> args,
+ detail::locale_ref loc) {
+ using iterator = typename buffer_context<Char>::iterator;
+ auto out = buffer_appender<Char>(buf);
+ if (format_str.size() == 2 && equal2(format_str.data(), "{}")) {
+ auto arg = args.get(0);
+ if (!arg) error_handler().on_error("argument not found");
+ visit_format_arg(default_arg_formatter<iterator, Char>{out, args, loc},
+ arg);
+ return;
+ }
+ format_handler<iterator, Char, buffer_context<Char>> h(out, format_str, args,
+ loc);
+ parse_format_string<false>(format_str, h);
}
#ifndef FMT_HEADER_ONLY
-extern template format_context::iterator detail::vformat_to(
- detail::buffer<char>&, string_view, basic_format_args<format_context>);
+extern template void detail::vformat_to(detail::buffer<char>&, string_view,
+ basic_format_args<format_context>,
+ detail::locale_ref);
namespace detail {
+
extern template FMT_API std::string grouping_impl<char>(locale_ref loc);
extern template FMT_API std::string grouping_impl<wchar_t>(locale_ref loc);
extern template FMT_API char thousands_sep_impl<char>(locale_ref loc);
@@ -3494,7 +3807,7 @@ extern template int snprintf_float<long double>(long double value,
template <typename S, typename Char = char_t<S>,
FMT_ENABLE_IF(detail::is_string<S>::value)>
-inline typename FMT_BUFFER_CONTEXT(Char)::iterator vformat_to(
+inline void vformat_to(
detail::buffer<Char>& buf, const S& format_str,
basic_format_args<FMT_BUFFER_CONTEXT(type_identity_t<Char>)> args) {
return detail::vformat_to(buf, to_string_view(format_str), args);
@@ -3504,10 +3817,9 @@ template <typename S, typename... Args, size_t SIZE = inline_buffer_size,
typename Char = enable_if_t<detail::is_string<S>::value, char_t<S>>>
inline typename buffer_context<Char>::iterator format_to(
basic_memory_buffer<Char, SIZE>& buf, const S& format_str, Args&&... args) {
- detail::check_format_string<Args...>(format_str);
- using context = buffer_context<Char>;
- return detail::vformat_to(buf, to_string_view(format_str),
- make_format_args<context>(args...));
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
+ detail::vformat_to(buf, to_string_view(format_str), vargs);
+ return detail::buffer_appender<Char>(buf);
}
template <typename OutputIt, typename Char = char>
@@ -3516,88 +3828,17 @@ using format_context_t = basic_format_context<OutputIt, Char>;
template <typename OutputIt, typename Char = char>
using format_args_t = basic_format_args<format_context_t<OutputIt, Char>>;
-template <
- typename S, typename OutputIt, typename... Args,
- FMT_ENABLE_IF(detail::is_output_iterator<OutputIt>::value &&
- !detail::is_contiguous_back_insert_iterator<OutputIt>::value)>
-inline OutputIt vformat_to(
- OutputIt out, const S& format_str,
- format_args_t<type_identity_t<OutputIt>, char_t<S>> args) {
- using af = detail::arg_formatter<OutputIt, char_t<S>>;
- return vformat_to<af>(out, to_string_view(format_str), args);
-}
-
-/**
- \rst
- Formats arguments, writes the result to the output iterator ``out`` and returns
- the iterator past the end of the output range.
-
- **Example**::
-
- std::vector<char> out;
- fmt::format_to(std::back_inserter(out), "{}", 42);
- \endrst
- */
-template <typename OutputIt, typename S, typename... Args,
- FMT_ENABLE_IF(
- detail::is_output_iterator<OutputIt>::value &&
- !detail::is_contiguous_back_insert_iterator<OutputIt>::value &&
- detail::is_string<S>::value)>
-inline OutputIt format_to(OutputIt out, const S& format_str, Args&&... args) {
- detail::check_format_string<Args...>(format_str);
- using context = format_context_t<OutputIt, char_t<S>>;
- return vformat_to(out, to_string_view(format_str),
- make_format_args<context>(args...));
-}
-
-template <typename OutputIt> struct format_to_n_result {
- /** Iterator past the end of the output range. */
- OutputIt out;
- /** Total (not truncated) output size. */
- size_t size;
-};
-
template <typename OutputIt, typename Char = typename OutputIt::value_type>
-using format_to_n_context =
- format_context_t<detail::truncating_iterator<OutputIt>, Char>;
+using format_to_n_context FMT_DEPRECATED_ALIAS = buffer_context<Char>;
template <typename OutputIt, typename Char = typename OutputIt::value_type>
-using format_to_n_args = basic_format_args<format_to_n_context<OutputIt, Char>>;
+using format_to_n_args FMT_DEPRECATED_ALIAS =
+ basic_format_args<buffer_context<Char>>;
template <typename OutputIt, typename Char, typename... Args>
-inline format_arg_store<format_to_n_context<OutputIt, Char>, Args...>
+FMT_DEPRECATED format_arg_store<buffer_context<Char>, Args...>
make_format_to_n_args(const Args&... args) {
- return format_arg_store<format_to_n_context<OutputIt, Char>, Args...>(
- args...);
-}
-
-template <typename OutputIt, typename Char, typename... Args,
- FMT_ENABLE_IF(detail::is_output_iterator<OutputIt>::value)>
-inline format_to_n_result<OutputIt> vformat_to_n(
- OutputIt out, size_t n, basic_string_view<Char> format_str,
- format_to_n_args<type_identity_t<OutputIt>, type_identity_t<Char>> args) {
- auto it = vformat_to(detail::truncating_iterator<OutputIt>(out, n),
- format_str, args);
- return {it.base(), it.count()};
-}
-
-/**
- \rst
- Formats arguments, writes up to ``n`` characters of the result to the output
- iterator ``out`` and returns the total output size and the iterator past the
- end of the output range.
- \endrst
- */
-template <typename OutputIt, typename S, typename... Args,
- FMT_ENABLE_IF(detail::is_string<S>::value&&
- detail::is_output_iterator<OutputIt>::value)>
-inline format_to_n_result<OutputIt> format_to_n(OutputIt out, size_t n,
- const S& format_str,
- const Args&... args) {
- detail::check_format_string<Args...>(format_str);
- using context = format_to_n_context<OutputIt, char_t<S>>;
- return vformat_to_n(out, n, to_string_view(format_str),
- make_format_args<context>(args...));
+ return format_arg_store<buffer_context<Char>, Args...>(args...);
}
template <typename Char, enable_if_t<(!std::is_same<Char, char>::value), int>>
@@ -3609,15 +3850,6 @@ std::basic_string<Char> detail::vformat(
return to_string(buffer);
}
-/**
- Returns the number of characters in the output of
- ``format(format_str, args...)``.
- */
-template <typename... Args>
-inline size_t formatted_size(string_view format_str, const Args&... args) {
- return format_to(detail::counting_iterator(), format_str, args...).count();
-}
-
template <typename Char, FMT_ENABLE_IF(std::is_same<Char, wchar_t>::value)>
void vprint(std::FILE* f, basic_string_view<Char> format_str,
wformat_args args) {
@@ -3642,8 +3874,7 @@ template <typename Char, Char... CHARS> class udl_formatter {
template <typename... Args>
std::basic_string<Char> operator()(Args&&... args) const {
static FMT_CONSTEXPR_DECL Char s[] = {CHARS..., '\0'};
- check_format_string<remove_cvref_t<Args>...>(FMT_STRING(s));
- return format(s, std::forward<Args>(args)...);
+ return format(FMT_STRING(s), std::forward<Args>(args)...);
}
};
# else
diff --git a/src/third_party/format.cpp b/src/third_party/format.cpp
index a64a1f3..6141d96 100644
--- a/src/third_party/format.cpp
+++ b/src/third_party/format.cpp
@@ -23,6 +23,36 @@ int format_float(char* buf, std::size_t size, const char* format, int precision,
return precision < 0 ? snprintf_ptr(buf, size, format, value)
: snprintf_ptr(buf, size, format, precision, value);
}
+
+template FMT_API dragonbox::decimal_fp<float> dragonbox::to_decimal(float x)
+ FMT_NOEXCEPT;
+template FMT_API dragonbox::decimal_fp<double> dragonbox::to_decimal(double x)
+ FMT_NOEXCEPT;
+
+// DEPRECATED! This function exists for ABI compatibility.
+template <typename Char>
+typename basic_format_context<std::back_insert_iterator<buffer<Char>>,
+ Char>::iterator
+vformat_to(buffer<Char>& buf, basic_string_view<Char> format_str,
+ basic_format_args<basic_format_context<
+ std::back_insert_iterator<buffer<type_identity_t<Char>>>,
+ type_identity_t<Char>>>
+ args) {
+ using iterator = std::back_insert_iterator<buffer<char>>;
+ using context = basic_format_context<
+ std::back_insert_iterator<buffer<type_identity_t<Char>>>,
+ type_identity_t<Char>>;
+ auto out = iterator(buf);
+ format_handler<iterator, Char, context> h(out, format_str, args, {});
+ parse_format_string<false>(format_str, h);
+ return out;
+}
+template basic_format_context<std::back_insert_iterator<buffer<char>>,
+ char>::iterator
+vformat_to(buffer<char>&, string_view,
+ basic_format_args<basic_format_context<
+ std::back_insert_iterator<buffer<type_identity_t<char>>>,
+ type_identity_t<char>>>);
} // namespace detail
template struct FMT_INSTANTIATION_DEF_API detail::basic_data<void>;
@@ -44,9 +74,9 @@ template FMT_API char detail::decimal_point_impl(locale_ref);
template FMT_API void detail::buffer<char>::append(const char*, const char*);
-template FMT_API FMT_BUFFER_CONTEXT(char)::iterator detail::vformat_to(
+template FMT_API void detail::vformat_to(
detail::buffer<char>&, string_view,
- basic_format_args<FMT_BUFFER_CONTEXT(char)>);
+ basic_format_args<FMT_BUFFER_CONTEXT(char)>, detail::locale_ref);
template FMT_API int detail::snprintf_float(double, int, detail::float_specs,
detail::buffer<char>&);
diff --git a/src/third_party/nonstd/optional.hpp b/src/third_party/nonstd/optional.hpp
index 33a9b98..8b371e5 100644
--- a/src/third_party/nonstd/optional.hpp
+++ b/src/third_party/nonstd/optional.hpp
@@ -12,7 +12,7 @@
#define NONSTD_OPTIONAL_LITE_HPP
#define optional_lite_MAJOR 3
-#define optional_lite_MINOR 2
+#define optional_lite_MINOR 4
#define optional_lite_PATCH 0
#define optional_lite_VERSION optional_STRINGIFY(optional_lite_MAJOR) "." optional_STRINGIFY(optional_lite_MINOR) "." optional_STRINGIFY(optional_lite_PATCH)
@@ -26,6 +26,20 @@
#define optional_OPTIONAL_NONSTD 1
#define optional_OPTIONAL_STD 2
+// tweak header support:
+
+#ifdef __has_include
+# if __has_include(<nonstd/optional.tweak.hpp>)
+# include <nonstd/optional.tweak.hpp>
+# endif
+#define optional_HAVE_TWEAK_HEADER 1
+#else
+#define optional_HAVE_TWEAK_HEADER 0
+//# pragma message("optional.hpp: Note: Tweak header not supported.")
+#endif
+
+// optional selection and configuration:
+
#if !defined( optional_CONFIG_SELECT_OPTIONAL )
# define optional_CONFIG_SELECT_OPTIONAL ( optional_HAVE_STD_OPTIONAL ? optional_OPTIONAL_STD : optional_OPTIONAL_NONSTD )
#endif
@@ -33,7 +47,10 @@
// Control presence of exception handling (try and auto discover):
#ifndef optional_CONFIG_NO_EXCEPTIONS
-# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)
+# if _MSC_VER
+# include <cstddef> // for _HAS_EXCEPTIONS
+# endif
+# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS)
# define optional_CONFIG_NO_EXCEPTIONS 0
# else
# define optional_CONFIG_NO_EXCEPTIONS 1
@@ -227,16 +244,17 @@ namespace nonstd {
// Compiler versions:
//
-// MSVC++ 6.0 _MSC_VER == 1200 (Visual Studio 6.0)
-// MSVC++ 7.0 _MSC_VER == 1300 (Visual Studio .NET 2002)
-// MSVC++ 7.1 _MSC_VER == 1310 (Visual Studio .NET 2003)
-// MSVC++ 8.0 _MSC_VER == 1400 (Visual Studio 2005)
-// MSVC++ 9.0 _MSC_VER == 1500 (Visual Studio 2008)
-// MSVC++ 10.0 _MSC_VER == 1600 (Visual Studio 2010)
-// MSVC++ 11.0 _MSC_VER == 1700 (Visual Studio 2012)
-// MSVC++ 12.0 _MSC_VER == 1800 (Visual Studio 2013)
-// MSVC++ 14.0 _MSC_VER == 1900 (Visual Studio 2015)
-// MSVC++ 14.1 _MSC_VER >= 1910 (Visual Studio 2017)
+// MSVC++ 6.0 _MSC_VER == 1200 optional_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0)
+// MSVC++ 7.0 _MSC_VER == 1300 optional_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002)
+// MSVC++ 7.1 _MSC_VER == 1310 optional_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003)
+// MSVC++ 8.0 _MSC_VER == 1400 optional_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005)
+// MSVC++ 9.0 _MSC_VER == 1500 optional_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008)
+// MSVC++ 10.0 _MSC_VER == 1600 optional_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010)
+// MSVC++ 11.0 _MSC_VER == 1700 optional_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012)
+// MSVC++ 12.0 _MSC_VER == 1800 optional_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013)
+// MSVC++ 14.0 _MSC_VER == 1900 optional_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015)
+// MSVC++ 14.1 _MSC_VER >= 1910 optional_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017)
+// MSVC++ 14.2 _MSC_VER >= 1920 optional_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019)
#if defined(_MSC_VER ) && !defined(__clang__)
# define optional_COMPILER_MSVC_VER (_MSC_VER )
@@ -295,13 +313,26 @@ namespace nonstd {
#define optional_CPP14_000 (optional_CPP14_OR_GREATER)
#define optional_CPP17_000 (optional_CPP17_OR_GREATER)
+// gcc >= 4.9, msvc >= vc14.1 (vs17):
+#define optional_CPP11_140_G490 ((optional_CPP11_OR_GREATER_ && optional_COMPILER_GNUC_VERSION >= 490) || (optional_COMPILER_MSVC_VER >= 1910))
+
+// clang >= 3.5, msvc >= vc11 (vs12):
+#define optional_CPP11_110_C350 ( optional_CPP11_110 && !optional_BETWEEN( optional_COMPILER_CLANG_VERSION, 1, 350 ) )
+
+// clang >= 3.5, gcc >= 5.0, msvc >= vc11 (vs12):
+#define optional_CPP11_110_C350_G500 \
+ ( optional_CPP11_110 && \
+ !( optional_BETWEEN( optional_COMPILER_CLANG_VERSION, 1, 350 ) \
+ || optional_BETWEEN( optional_COMPILER_GNUC_VERSION , 1, 500 ) ) )
+
// Presence of C++11 language features:
#define optional_HAVE_CONSTEXPR_11 optional_CPP11_140
#define optional_HAVE_IS_DEFAULT optional_CPP11_140
#define optional_HAVE_NOEXCEPT optional_CPP11_140
#define optional_HAVE_NULLPTR optional_CPP11_100
-#define optional_HAVE_REF_QUALIFIER optional_CPP11_140
+#define optional_HAVE_REF_QUALIFIER optional_CPP11_140_G490
+#define optional_HAVE_INITIALIZER_LIST optional_CPP11_140
// Presence of C++14 language features:
@@ -320,6 +351,13 @@ namespace nonstd {
#define optional_HAVE_TR1_TYPE_TRAITS (!! optional_COMPILER_GNUC_VERSION )
#define optional_HAVE_TR1_ADD_POINTER (!! optional_COMPILER_GNUC_VERSION )
+#define optional_HAVE_IS_ASSIGNABLE optional_CPP11_110_C350
+#define optional_HAVE_IS_MOVE_CONSTRUCTIBLE optional_CPP11_110_C350
+#define optional_HAVE_IS_NOTHROW_MOVE_ASSIGNABLE optional_CPP11_110_C350
+#define optional_HAVE_IS_NOTHROW_MOVE_CONSTRUCTIBLE optional_CPP11_110_C350
+#define optional_HAVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE optional_CPP11_110_C350_G500
+#define optional_HAVE_IS_TRIVIALLY_MOVE_CONSTRUCTIBLE optional_CPP11_110_C350_G500
+
// C++ feature usage:
#if optional_HAVE( CONSTEXPR_11 )
@@ -397,7 +435,7 @@ namespace nonstd {
template< bool B = (__VA_ARGS__), typename std::enable_if<B, int>::type = 0 >
#define optional_REQUIRES_T(...) \
- , typename = typename std::enable_if< (__VA_ARGS__), nonstd::optional_lite::detail::enabler >::type
+ , typename std::enable_if< (__VA_ARGS__), int >::type = 0
#define optional_REQUIRES_R(R, ...) \
typename std::enable_if< (__VA_ARGS__), R>::type
@@ -415,6 +453,12 @@ namespace nonstd { namespace optional_lite {
namespace std11 {
+template< class T, T v > struct integral_constant { enum { value = v }; };
+template< bool B > struct bool_constant : integral_constant<bool, B>{};
+
+typedef bool_constant< true > true_type;
+typedef bool_constant< false > false_type;
+
#if optional_CPP11_OR_GREATER
using std::move;
#else
@@ -428,6 +472,42 @@ namespace std11 {
template< typename T, typename F > struct conditional<false, T, F> { typedef F type; };
#endif // optional_HAVE_CONDITIONAL
+#if optional_HAVE( IS_ASSIGNABLE )
+ using std::is_assignable;
+#else
+ template< class T, class U > struct is_assignable : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_MOVE_CONSTRUCTIBLE )
+ using std::is_move_constructible;
+#else
+ template< class T > struct is_move_constructible : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_NOTHROW_MOVE_ASSIGNABLE )
+ using std::is_nothrow_move_assignable;
+#else
+ template< class T > struct is_nothrow_move_assignable : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_NOTHROW_MOVE_CONSTRUCTIBLE )
+ using std::is_nothrow_move_constructible;
+#else
+ template< class T > struct is_nothrow_move_constructible : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_TRIVIALLY_COPY_CONSTRUCTIBLE )
+ using std::is_trivially_copy_constructible;
+#else
+ template< class T > struct is_trivially_copy_constructible : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_TRIVIALLY_MOVE_CONSTRUCTIBLE )
+ using std::is_trivially_move_constructible;
+#else
+ template< class T > struct is_trivially_move_constructible : std11::true_type{};
+#endif
+
} // namespace std11
#if optional_CPP11_OR_GREATER
@@ -450,10 +530,10 @@ using std::swap;
struct is_swappable
{
template< typename T, typename = decltype( swap( std::declval<T&>(), std::declval<T&>() ) ) >
- static std::true_type test( int /*unused*/ );
+ static std11::true_type test( int /*unused*/ );
template< typename >
- static std::false_type test(...);
+ static std11::false_type test(...);
};
struct is_nothrow_swappable
@@ -467,10 +547,10 @@ struct is_nothrow_swappable
}
template< typename T >
- static auto test( int /*unused*/ ) -> std::integral_constant<bool, satisfies<T>()>{}
+ static auto test( int /*unused*/ ) -> std11::integral_constant<bool, satisfies<T>()>{}
template< typename >
- static auto test(...) -> std::false_type;
+ static auto test(...) -> std11::false_type;
};
} // namespace detail
@@ -508,12 +588,6 @@ class optional;
namespace detail {
-// for optional_REQUIRES_T
-
-#if optional_CPP11_OR_GREATER
-enum class enabler{};
-#endif
-
// C++11 emulation:
struct nulltype{};
@@ -705,6 +779,12 @@ union storage_t
}
template< class... Args >
+ storage_t( nonstd_lite_in_place_t(T), Args&&... args )
+ {
+ emplace( std::forward<Args>(args)... );
+ }
+
+ template< class... Args >
void emplace( Args&&... args )
{
::new( value_ptr() ) value_type( std::forward<Args>(args)... );
@@ -743,7 +823,7 @@ union storage_t
return * value_ptr();
}
-#if optional_CPP11_OR_GREATER
+#if optional_HAVE( REF_QUALIFIER )
optional_nodiscard value_type const && value() const optional_refref_qual
{
@@ -861,13 +941,15 @@ public:
{}
// 2 - copy-construct
- optional_constexpr14 optional( optional const & other
#if optional_CPP11_OR_GREATER
- optional_REQUIRES_A(
- true || std::is_copy_constructible<T>::value
- )
+ // template< typename U = T
+ // optional_REQUIRES_T(
+ // std::is_copy_constructible<U>::value
+ // || std11::is_trivially_copy_constructible<U>::value
+ // )
+ // >
#endif
- )
+ optional_constexpr14 optional( optional const & other )
: has_value_( other.has_value() )
{
if ( other.has_value() )
@@ -879,12 +961,15 @@ public:
#if optional_CPP11_OR_GREATER
// 3 (C++11) - move-construct from optional
- optional_constexpr14 optional( optional && other
- optional_REQUIRES_A(
- true || std::is_move_constructible<T>::value
+ template< typename U = T
+ optional_REQUIRES_T(
+ std11::is_move_constructible<U>::value
+ || std11::is_trivially_move_constructible<U>::value
)
- // NOLINTNEXTLINE( performance-noexcept-move-constructor )
- ) noexcept( std::is_nothrow_move_constructible<T>::value )
+ >
+ optional_constexpr14 optional( optional && other )
+ // NOLINTNEXTLINE( performance-noexcept-move-constructor )
+ noexcept( std11::is_nothrow_move_constructible<T>::value )
: has_value_( other.has_value() )
{
if ( other.has_value() )
@@ -894,9 +979,8 @@ public:
}
// 4a (C++11) - explicit converting copy-construct from optional
- template< typename U >
- explicit optional( optional<U> const & other
- optional_REQUIRES_A(
+ template< typename U
+ optional_REQUIRES_T(
std::is_constructible<T, U const &>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
@@ -908,7 +992,8 @@ public:
&& !std::is_convertible< optional<U> const &&, T>::value
&& !std::is_convertible< U const & , T>::value /*=> explicit */
)
- )
+ >
+ explicit optional( optional<U> const & other )
: has_value_( other.has_value() )
{
if ( other.has_value() )
@@ -919,11 +1004,9 @@ public:
#endif // optional_CPP11_OR_GREATER
// 4b (C++98 and later) - non-explicit converting copy-construct from optional
- template< typename U >
- // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
- optional( optional<U> const & other
+ template< typename U
#if optional_CPP11_OR_GREATER
- optional_REQUIRES_A(
+ optional_REQUIRES_T(
std::is_constructible<T, U const &>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
@@ -936,7 +1019,9 @@ public:
&& std::is_convertible< U const & , T>::value /*=> non-explicit */
)
#endif // optional_CPP11_OR_GREATER
- )
+ >
+ // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
+ /*non-explicit*/ optional( optional<U> const & other )
: has_value_( other.has_value() )
{
if ( other.has_value() )
@@ -948,9 +1033,8 @@ public:
#if optional_CPP11_OR_GREATER
// 5a (C++11) - explicit converting move-construct from optional
- template< typename U >
- explicit optional( optional<U> && other
- optional_REQUIRES_A(
+ template< typename U
+ optional_REQUIRES_T(
std::is_constructible<T, U &&>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
@@ -962,6 +1046,8 @@ public:
&& !std::is_convertible< optional<U> const &&, T>::value
&& !std::is_convertible< U &&, T>::value /*=> explicit */
)
+ >
+ explicit optional( optional<U> && other
)
: has_value_( other.has_value() )
{
@@ -972,10 +1058,8 @@ public:
}
// 5a (C++11) - non-explicit converting move-construct from optional
- template< typename U >
- // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
- optional( optional<U> && other
- optional_REQUIRES_A(
+ template< typename U
+ optional_REQUIRES_T(
std::is_constructible<T, U &&>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
@@ -987,7 +1071,9 @@ public:
&& !std::is_convertible< optional<U> const &&, T>::value
&& std::is_convertible< U &&, T>::value /*=> non-explicit */
)
- )
+ >
+ // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
+ /*non-explicit*/ optional( optional<U> && other )
: has_value_( other.has_value() )
{
if ( other.has_value() )
@@ -1019,32 +1105,32 @@ public:
{}
// 8a (C++11) - explicit move construct from value
- template< typename U = value_type >
- optional_constexpr explicit optional( U && value
- optional_REQUIRES_A(
+ template< typename U = T
+ optional_REQUIRES_T(
std::is_constructible<T, U&&>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, nonstd_lite_in_place_t(U)>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, optional<T>>::value
&& !std::is_convertible<U&&, T>::value /*=> explicit */
)
- )
+ >
+ optional_constexpr explicit optional( U && value )
: has_value_( true )
- , contained( T{ std::forward<U>( value ) } )
+ , contained( nonstd_lite_in_place(T), std::forward<U>( value ) )
{}
// 8b (C++11) - non-explicit move construct from value
- template< typename U = value_type >
- // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
- optional_constexpr optional( U && value
- optional_REQUIRES_A(
+ template< typename U = T
+ optional_REQUIRES_T(
std::is_constructible<T, U&&>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, nonstd_lite_in_place_t(U)>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, optional<T>>::value
&& std::is_convertible<U&&, T>::value /*=> non-explicit */
)
- )
+ >
+ // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
+ optional_constexpr /*non-explicit*/ optional( U && value )
: has_value_( true )
- , contained( std::forward<U>( value ) )
+ , contained( nonstd_lite_in_place(T), std::forward<U>( value ) )
{}
#else // optional_CPP11_OR_GREATER
@@ -1087,8 +1173,8 @@ public:
)
operator=( optional const & other )
noexcept(
- std::is_nothrow_move_assignable<T>::value
- && std::is_nothrow_move_constructible<T>::value
+ std11::is_nothrow_move_assignable<T>::value
+ && std11::is_nothrow_move_constructible<T>::value
)
#else
optional & operator=( optional const & other )
@@ -1107,7 +1193,7 @@ public:
optional_REQUIRES_R(
optional &,
true
-// std::is_move_constructible<T>::value
+// std11::is_move_constructible<T>::value
// && std::is_move_assignable<T>::value
)
operator=( optional && other ) noexcept
@@ -1124,7 +1210,7 @@ public:
optional_REQUIRES_R(
optional &,
std::is_constructible<T , U>::value
- && std::is_assignable<T&, U>::value
+ && std11::is_assignable<T&, U>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, nonstd_lite_in_place_t(U)>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, optional<T>>::value
&& !(std::is_scalar<T>::value && std::is_same<T, typename std::decay<U>::type>::value)
@@ -1162,7 +1248,7 @@ public:
optional_REQUIRES_R(
optional&,
std::is_constructible< T , U const &>::value
- && std::is_assignable< T&, U const &>::value
+ && std11::is_assignable< T&, U const &>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
&& !std::is_constructible<T, optional<U> const & >::value
@@ -1171,10 +1257,10 @@ public:
&& !std::is_convertible< optional<U> && , T>::value
&& !std::is_convertible< optional<U> const & , T>::value
&& !std::is_convertible< optional<U> const &&, T>::value
- && !std::is_assignable< T&, optional<U> & >::value
- && !std::is_assignable< T&, optional<U> && >::value
- && !std::is_assignable< T&, optional<U> const & >::value
- && !std::is_assignable< T&, optional<U> const && >::value
+ && !std11::is_assignable< T&, optional<U> & >::value
+ && !std11::is_assignable< T&, optional<U> && >::value
+ && !std11::is_assignable< T&, optional<U> const & >::value
+ && !std11::is_assignable< T&, optional<U> const && >::value
)
#else
optional&
@@ -1192,7 +1278,7 @@ public:
optional_REQUIRES_R(
optional&,
std::is_constructible< T , U>::value
- && std::is_assignable< T&, U>::value
+ && std11::is_assignable< T&, U>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
&& !std::is_constructible<T, optional<U> const & >::value
@@ -1201,10 +1287,10 @@ public:
&& !std::is_convertible< optional<U> && , T>::value
&& !std::is_convertible< optional<U> const & , T>::value
&& !std::is_convertible< optional<U> const &&, T>::value
- && !std::is_assignable< T&, optional<U> & >::value
- && !std::is_assignable< T&, optional<U> && >::value
- && !std::is_assignable< T&, optional<U> const & >::value
- && !std::is_assignable< T&, optional<U> const && >::value
+ && !std11::is_assignable< T&, optional<U> & >::value
+ && !std11::is_assignable< T&, optional<U> && >::value
+ && !std11::is_assignable< T&, optional<U> const & >::value
+ && !std11::is_assignable< T&, optional<U> const && >::value
)
operator=( optional<U> && other )
{
@@ -1246,7 +1332,7 @@ public:
void swap( optional & other )
#if optional_CPP11_OR_GREATER
noexcept(
- std::is_nothrow_move_constructible<T>::value
+ std11::is_nothrow_move_constructible<T>::value
&& std17::is_nothrow_swappable<T>::value
)
#endif
@@ -1283,7 +1369,7 @@ public:
contained.value();
}
-#if optional_HAVE( REF_QUALIFIER ) && ( !optional_COMPILER_GNUC_VERSION || optional_COMPILER_GNUC_VERSION >= 490 )
+#if optional_HAVE( REF_QUALIFIER )
optional_constexpr value_type const && operator *() const optional_refref_qual
{
@@ -1612,7 +1698,7 @@ inline optional_constexpr bool operator>=( U const & v, optional<T> const & x )
template< typename T
#if optional_CPP11_OR_GREATER
optional_REQUIRES_T(
- std::is_move_constructible<T>::value
+ std11::is_move_constructible<T>::value
&& std17::is_swappable<T>::value )
#endif
>
@@ -1659,7 +1745,10 @@ optional<T> make_optional( T const & value )
using optional_lite::optional;
using optional_lite::nullopt_t;
using optional_lite::nullopt;
+
+#if ! optional_CONFIG_NO_EXCEPTIONS
using optional_lite::bad_optional_access;
+#endif
using optional_lite::make_optional;
diff --git a/src/third_party/win32/mktemp.c b/src/third_party/win32/mktemp.c
new file mode 100644
index 0000000..8963b89
--- /dev/null
+++ b/src/third_party/win32/mktemp.c
@@ -0,0 +1,260 @@
+/* $OpenBSD: mktemp.c,v 1.39 2017/11/28 06:55:49 tb Exp $ */
+/*
+ * Copyright (c) 1996-1998, 2008 Theo de Raadt
+ * Copyright (c) 1997, 2008-2009 Todd C. Miller
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef _WIN32
+#ifndef _WIN32_WINNT
+#define _WIN32_WINNT 0x0600 // _WIN32_WINNT_VISTA
+#endif
+
+#ifndef _CRT_NONSTDC_NO_DEPRECATE
+#define _CRT_NONSTDC_NO_DEPRECATE
+#endif
+
+#ifndef _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+#include <direct.h>
+#include <io.h>
+
+#define WIN32_LEAN_AND_MEAN
+#define NOMINMAX 1
+#define WIN32_NO_STATUS
+#include <windows.h>
+#undef WIN32_NO_STATUS
+#include <ntstatus.h>
+
+// Work-around wrong calling convention for RtlGenRandom in old mingw-w64
+#define SystemFunction036 __stdcall SystemFunction036
+#include <ntsecapi.h>
+#undef SystemFunction036
+#endif
+
+#ifdef _MSC_VER
+#define S_IRUSR (_S_IREAD)
+#define S_IWUSR (_S_IWRITE)
+#endif
+
+#define MKTEMP_NAME 0
+#define MKTEMP_FILE 1
+#define MKTEMP_DIR 2
+
+#define TEMPCHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+#define NUM_CHARS (sizeof(TEMPCHARS) - 1)
+#define MIN_X 6
+
+#ifdef _WIN32
+#define MKOTEMP_FLAGS (_O_APPEND|_O_NOINHERIT|_O_BINARY|_O_TEXT| \
+ _O_U16TEXT|_O_U8TEXT|_O_WTEXT)
+#define MKTEMP_FLAGS_DEFAULT (_O_BINARY)
+#else
+#define MKOTEMP_FLAGS (O_APPEND|O_CLOEXEC|O_DSYNC|O_RSYNC|O_SYNC)
+#define MKTEMP_FLAGS_DEFAULT (0)
+#endif
+
+#ifndef nitems
+#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
+#endif
+
+#ifdef _WIN32
+static BOOL CALLBACK
+lookup_ntdll_function_once(
+ PINIT_ONCE init_once, PVOID parameter, PVOID *context)
+{
+ (void)init_once;
+ *context = (PVOID)GetProcAddress(
+ GetModuleHandleA("ntdll.dll"), parameter);
+ return(TRUE);
+}
+
+static NTSTATUS
+GetLastNtStatus()
+{
+ static INIT_ONCE init_once = INIT_ONCE_STATIC_INIT;
+ typedef NTSTATUS(NTAPI * RtlGetLastNtStatus_t)(void);
+ RtlGetLastNtStatus_t get_last_nt_status = NULL;
+ InitOnceExecuteOnce(&init_once, lookup_ntdll_function_once,
+ "RtlGetLastNtStatus", (LPVOID *)&get_last_nt_status);
+ return(get_last_nt_status());
+}
+
+static int
+normalize_msvcrt_errno(int ret)
+{
+ if (ret == -1 && errno == EACCES && _doserrno == ERROR_ACCESS_DENIED) {
+ /*
+ * Win32 APIs return ERROR_ACCESS_DENIED for many distinct
+ * NTSTATUS codes, even when it's arguably inappropriate to do
+ * so, e.g. if you attempt to open a directory, or open a file
+ * that's in the "pending delete" state. These are mapped to
+ * EACCESS in the C runtime. We instead map these to EEXIST.
+ */
+ NTSTATUS nt_err = GetLastNtStatus();
+ if (nt_err == STATUS_FILE_IS_A_DIRECTORY ||
+ nt_err == STATUS_DELETE_PENDING) {
+ errno = EEXIST;
+ }
+ }
+ return(ret);
+}
+
+#define open(...) (normalize_msvcrt_errno(open(__VA_ARGS__)))
+#define mkdir(path, mode) (normalize_msvcrt_errno(mkdir(path)))
+#define lstat(path, sb) (normalize_msvcrt_errno(stat(path, sb)))
+
+static void (*_bsd_mkstemp_random_source)(void *buf, size_t n);
+
+void
+bsd_mkstemp_set_random_source(void (*f)(void *buf, size_t n))
+{
+ _bsd_mkstemp_random_source = f;
+}
+
+static void
+arc4random_buf(void *buf, size_t nbytes)
+{
+ if (_bsd_mkstemp_random_source != NULL) {
+ _bsd_mkstemp_random_source(buf, nbytes);
+ } else {
+ RtlGenRandom(buf, (ULONG)nbytes);
+ }
+}
+#endif
+
+static int
+mktemp_internal(char *path, int slen, int mode, int flags)
+{
+ char *start, *cp, *ep;
+ const char tempchars[] = TEMPCHARS;
+ unsigned int tries;
+ struct stat sb;
+ size_t len;
+ int fd;
+
+ len = strlen(path);
+ if (len < MIN_X || slen < 0 || (size_t)slen > len - MIN_X) {
+ errno = EINVAL;
+ return(-1);
+ }
+ ep = path + len - slen;
+
+ for (start = ep; start > path && start[-1] == 'X'; start--)
+ ;
+ if (ep - start < MIN_X) {
+ errno = EINVAL;
+ return(-1);
+ }
+
+ if (flags & ~MKOTEMP_FLAGS) {
+ errno = EINVAL;
+ return(-1);
+ }
+ flags |= O_CREAT|O_EXCL|O_RDWR;
+
+ tries = INT_MAX;
+ do {
+ cp = start;
+ do {
+ unsigned short rbuf[16];
+ unsigned int i;
+
+ /*
+ * Avoid lots of arc4random() calls by using
+ * a buffer sized for up to 16 Xs at a time.
+ */
+ arc4random_buf(rbuf, sizeof(rbuf));
+ for (i = 0; i < nitems(rbuf) && cp != ep; i++)
+ *cp++ = tempchars[rbuf[i] % NUM_CHARS];
+ } while (cp != ep);
+
+ switch (mode) {
+ case MKTEMP_NAME:
+ if (lstat(path, &sb) != 0)
+ return(errno == ENOENT ? 0 : -1);
+ break;
+ case MKTEMP_FILE:
+ fd = open(path, flags, S_IRUSR|S_IWUSR);
+ if (fd != -1 || errno != EEXIST)
+ return(fd);
+ break;
+ case MKTEMP_DIR:
+ if (mkdir(path, S_IRUSR|S_IWUSR|S_IXUSR) == 0)
+ return(0);
+ if (errno != EEXIST)
+ return(-1);
+ break;
+ }
+ } while (--tries);
+
+ errno = EEXIST;
+ return(-1);
+}
+
+char *
+bsd_mktemp(char *path)
+{
+ if (mktemp_internal(path, 0, MKTEMP_NAME, MKTEMP_FLAGS_DEFAULT) == -1)
+ return(NULL);
+ return(path);
+}
+
+int
+bsd_mkostemps(char *path, int slen, int flags)
+{
+ return(mktemp_internal(path, slen, MKTEMP_FILE, flags));
+}
+
+int
+bsd_mkstemp(char *path)
+{
+ return(mktemp_internal(path, 0, MKTEMP_FILE, MKTEMP_FLAGS_DEFAULT));
+}
+
+int
+bsd_mkostemp(char *path, int flags)
+{
+ return(mktemp_internal(path, 0, MKTEMP_FILE, flags));
+}
+
+int
+bsd_mkstemps(char *path, int slen)
+{
+ return(mktemp_internal(path, slen, MKTEMP_FILE, MKTEMP_FLAGS_DEFAULT));
+}
+
+char *
+bsd_mkdtemp(char *path)
+{
+ int error;
+
+ error = mktemp_internal(path, 0, MKTEMP_DIR, 0);
+ return(error ? NULL : path);
+}
diff --git a/src/third_party/win32/mktemp.h b/src/third_party/win32/mktemp.h
new file mode 100644
index 0000000..40e0c16
--- /dev/null
+++ b/src/third_party/win32/mktemp.h
@@ -0,0 +1,18 @@
+#ifndef CCACHE_THIRD_PARTY_WIN32_MKTEMP_H_
+#define CCACHE_THIRD_PARTY_WIN32_MKTEMP_H_
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int bsd_mkstemp(char *);
+
+// Exposed for testing.
+void bsd_mkstemp_set_random_source(void (*)(void *buf, size_t n));
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 8e372b7..c33befd 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -34,36 +34,37 @@ set_property(
${clean_files_prop_name} "${CMAKE_BINARY_DIR}/testdir")
addtest(base)
-addtest(nocpp2)
-addtest(cpp1)
-addtest(multi_arch)
-addtest(serialize_diagnostics)
+addtest(basedir)
+addtest(cache_levels)
+addtest(cleanup)
addtest(color_diagnostics)
-addtest(sanitize_blacklist)
+addtest(cpp1)
addtest(debug_prefix_map)
-addtest(profiling)
-addtest(profiling_gcc)
-addtest(profiling_clang)
-addtest(profiling_hip_clang)
-addtest(split_dwarf)
-addtest(masquerading)
-addtest(hardlink)
-addtest(fileclone)
+addtest(depend)
addtest(direct)
addtest(direct_gcc)
-addtest(depend)
-addtest(basedir)
-addtest(no_compression)
-addtest(readonly)
-addtest(readonly_direct)
-addtest(cache_levels)
-addtest(cleanup)
-addtest(pch)
-addtest(modules)
-addtest(upgrade)
+addtest(fileclone)
+addtest(hardlink)
+addtest(inode_cache)
addtest(input_charset)
+addtest(masquerading)
+addtest(modules)
+addtest(multi_arch)
+addtest(no_compression)
+addtest(nocpp2)
addtest(nvcc)
addtest(nvcc_direct)
addtest(nvcc_ldir)
addtest(nvcc_nocpp2)
-addtest(inode_cache)
+addtest(pch)
+addtest(profiling)
+addtest(profiling_clang)
+addtest(profiling_gcc)
+addtest(profiling_hip_clang)
+addtest(readonly)
+addtest(readonly_direct)
+addtest(sanitize_blacklist)
+addtest(serialize_diagnostics)
+addtest(source_date_epoch)
+addtest(split_dwarf)
+addtest(upgrade)
diff --git a/test/run b/test/run
index 9623e49..2244943 100755
--- a/test/run
+++ b/test/run
@@ -32,15 +32,15 @@ if [[ -t 1 ]]; then
fi
green() {
- printf "$ansi_boldgreen$*$ansi_reset\n"
+ printf "$ansi_boldgreen%s$ansi_reset\n" "$*"
}
red() {
- printf "$ansi_boldred$*$ansi_reset\n"
+ printf "$ansi_boldred%s$ansi_reset\n" "$*"
}
bold() {
- printf "$ansi_bold$*$ansi_reset\n"
+ printf "$ansi_bold%s$ansi_reset\n" "$*"
}
test_failed() {
@@ -81,9 +81,10 @@ find_compiler() {
generate_code() {
local nlines=$1
local outfile=$2
+ local i
rm -f $outfile
- for i in $(seq $nlines); do
+ for ((i = 1; i <= nlines; i++)); do
echo "int foo_$i(int x) { return x; }" >>$outfile
done
}
@@ -116,7 +117,7 @@ backdate() {
else
m=0
fi
- touch -t 1999010100$(printf "%02u" $m) "$@"
+ touch -t $((199901010000 + m)) "$@"
}
file_size() {
@@ -148,7 +149,18 @@ objdump_grep_cmd() {
expect_stat() {
local stat="$1"
local expected_value="$2"
- local value="$(echo $($CCACHE -s | fgrep "$stat" | cut -c33-))"
+ local line
+ local value=""
+
+ while IFS= read -r line; do
+ if [[ $line = *"$stat"* ]]; then
+ value="${line:32}"
+ # remove leading & trailing whitespace
+ value="${value#${value%%[![:space:]]*}}"
+ value="${value%${value##*[![:space:]]}}"
+ break
+ fi
+ done < <($CCACHE -s)
if [ "$expected_value" != "$value" ]; then
test_failed "Expected \"$stat\" to be $expected_value, actual $value"
@@ -214,6 +226,22 @@ is_equal_object_files() {
elfdump -a -w "$2".dump "$2"
# these were the elfdump fields that seemed to differ (empirically)
diff -I e_shoff -I sh_size -I st_name "$1".dump "$2".dump > /dev/null
+ elif $HOST_OS_WINDOWS && command -v dumpbin.exe >/dev/null; then
+ # Filter out fields that are affected by compilation time or source
+ # filename.
+ local awk_filter='
+ skip {--skip; next}
+
+ /Dump of file/ {next} # dumbin header
+ /time date stamp/ {next} # incremental linker timestamp
+ /number of symbols/ {next} # symbol count
+ /Filename *\| \.file$/ {skip=1; next} # .file symbol
+
+ {print}
+ '
+ dumpbin.exe -all -nologo "$1" | awk "$awk_filter" > "$1".dump
+ dumpbin.exe -all -nologo "$2" | awk "$awk_filter" > "$2".dump
+ cmp -s "$1".dump "$2".dump
else
cmp -s "$1" "$2"
fi
@@ -309,11 +337,12 @@ expect_perm() {
}
reset_environment() {
- while read name; do
- unset $name
- done <<EOF
-$(env | sed -n 's/^\(CCACHE_[A-Z0-9_]*\)=.*$/\1/p')
-EOF
+ while IFS= read -r name; do
+ if [[ $name =~ ^CCACHE_[A-Z0-9_]*$ ]]; then
+ unset $name
+ fi
+ done < <(compgen -e)
+
unset GCC_COLORS
unset TERM
unset XDG_CACHE_HOME
@@ -438,7 +467,7 @@ case $compiler_version in
;;
*clang*)
COMPILER_TYPE_CLANG=true
- CLANG_VERSION_SUFFIX=$(echo $COMPILER | sed 's/.*clang//')
+ CLANG_VERSION_SUFFIX=$(echo "${COMPILER%% *}" | sed 's/.*clang//')
;;
*)
echo "WARNING: Compiler $COMPILER not supported (version: $compiler_version) -- not running tests" >&2
@@ -479,12 +508,18 @@ else
PATH_DELIM=":"
fi
+if [[ $OSTYPE = msys* ]]; then
+ # Native symlink support for Windows.
+ export MSYS="${MSYS:-} winsymlinks:nativestrict"
+fi
+
if $HOST_OS_APPLE; then
SDKROOT=$(xcrun --sdk macosx --show-sdk-path 2>/dev/null)
if [ "$SDKROOT" = "" ]; then
echo "Error: xcrun --show-sdk-path failure"
exit 1
fi
+ export SDKROOT
SYSROOT="-isysroot `echo \"$SDKROOT\" | sed 's/ /\\ /g'`"
else
@@ -493,7 +528,7 @@ fi
# ---------------------------------------
-all_suites="$(sed -rn 's/^addtest\((.*)\)$/\1/p' $(dirname $0)/CMakeLists.txt)"
+all_suites="$(sed -En 's/^addtest\((.*)\)$/\1/p' $(dirname $0)/CMakeLists.txt)"
for suite in $all_suites; do
. $(dirname $0)/suites/$suite.bash
diff --git a/test/suites/base.bash b/test/suites/base.bash
index 75e2768..e16741c 100644
--- a/test/suites/base.bash
+++ b/test/suites/base.bash
@@ -46,7 +46,8 @@ base_tests() {
# The exact output is not tested, but at least it's something human readable
# and not random memory.
- if [ $($CCACHE --version | grep -c '^ccache version [a-zA-Z0-9_./+-]*$') -ne 1 ]; then
+ local version_pattern=$'^ccache version [a-zA-Z0-9_./+-]*\r?$'
+ if [ $($CCACHE --version | grep -E -c "$version_pattern") -ne 1 ]; then
test_failed "Unexpected output of --version"
fi
@@ -212,6 +213,7 @@ base_tests() {
rm -rf src
# -------------------------------------------------------------------------
+if ! $HOST_OS_WINDOWS; then
TEST "Source file ending with dot"
mkdir src
@@ -230,6 +232,7 @@ base_tests() {
rm foo.o
rm -rf src
+fi
# -------------------------------------------------------------------------
TEST "Multiple file extensions"
@@ -760,19 +763,23 @@ b"
expect_stat 'files in cache' 1
expect_equal_object_files reference_test1.o test1.o
- CCACHE_COMPILER=$COMPILER $CCACHE non_existing_compiler_will_be_overridden_anyway -c test1.c
+ CCACHE_COMPILER=$COMPILER_BIN $CCACHE \
+ non_existing_compiler_will_be_overridden_anyway \
+ $COMPILER_ARGS -c test1.c
expect_stat 'cache hit (preprocessed)' 1
expect_stat 'cache miss' 1
expect_stat 'files in cache' 1
expect_equal_object_files reference_test1.o test1.o
- CCACHE_COMPILER=$COMPILER $CCACHE same/for/relative -c test1.c
+ CCACHE_COMPILER=$COMPILER_BIN $CCACHE same/for/relative \
+ $COMPILER_ARGS -c test1.c
expect_stat 'cache hit (preprocessed)' 2
expect_stat 'cache miss' 1
expect_stat 'files in cache' 1
expect_equal_object_files reference_test1.o test1.o
- CCACHE_COMPILER=$COMPILER $CCACHE /and/even/absolute/compilers -c test1.c
+ CCACHE_COMPILER=$COMPILER_BIN $CCACHE /and/even/absolute/compilers \
+ $COMPILER_ARGS -c test1.c
expect_stat 'cache hit (preprocessed)' 3
expect_stat 'cache miss' 1
expect_stat 'files in cache' 1
@@ -788,7 +795,7 @@ EOF
chmod +x gcc
CCACHE_DEBUG=1 $CCACHE ./gcc -c test1.c
- compiler_type=$(sed -rn 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log)
+ compiler_type=$(sed -En 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log)
if [ "$compiler_type" != gcc ]; then
test_failed "Compiler type $compiler_type != gcc"
fi
@@ -796,7 +803,7 @@ EOF
rm test1.o.ccache-log
CCACHE_COMPILERTYPE=clang CCACHE_DEBUG=1 $CCACHE ./gcc -c test1.c
- compiler_type=$(sed -rn 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log)
+ compiler_type=$(sed -En 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log)
if [ "$compiler_type" != clang ]; then
test_failed "Compiler type $compiler_type != clang"
fi
@@ -970,6 +977,7 @@ EOF
# -------------------------------------------------------------------------
+if ! $HOST_OS_WINDOWS; then
TEST "CCACHE_UMASK"
saved_umask=$(umask)
@@ -1028,6 +1036,7 @@ EOF
expect_perm "$stats_file" -rw-rw-r--
umask $saved_umask
+fi
# -------------------------------------------------------------------------
TEST "No object file due to bad prefix"
@@ -1086,6 +1095,17 @@ EOF
expect_stat 'compiler produced empty output' 1
# -------------------------------------------------------------------------
+ TEST "Output to /dev/null"
+
+ $CCACHE_COMPILE -c test1.c
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ $CCACHE_COMPILE -c test1.c -o /dev/null
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 1
+
+ # -------------------------------------------------------------------------
TEST "Caching stderr"
cat <<EOF >stderr.c
@@ -1336,6 +1356,7 @@ EOF
fi
# -------------------------------------------------------------------------
+if ! $HOST_OS_WINDOWS; then
TEST "UNCACHED_ERR_FD"
cat >compiler.sh <<'EOF'
@@ -1366,6 +1387,7 @@ EOF
if [ "$stderr" != "2Pu1Cc" ]; then
test_failed "Unexpected stderr: $stderr != 2Pu1Cc"
fi
+fi
# -------------------------------------------------------------------------
TEST "Invalid boolean environment configuration options"
diff --git a/test/suites/cache_levels.bash b/test/suites/cache_levels.bash
index 776508b..ef2e8d5 100644
--- a/test/suites/cache_levels.bash
+++ b/test/suites/cache_levels.bash
@@ -11,7 +11,7 @@ expect_on_level() {
local expected_level="$2"
slashes=$(find $CCACHE_DIR -name "*$type" \
- | sed -r -e 's!.*\.ccache/!!' -e 's![^/]*$!!' -e 's![^/]!!g')
+ | sed -E -e 's!.*\.ccache/!!' -e 's![^/]*$!!' -e 's![^/]!!g')
actual_level=$(echo -n "$slashes" | wc -c)
if [ "$actual_level" -ne "$expected_level" ]; then
test_failed "$type file on level $actual_level, expected level $expected_level"
diff --git a/test/suites/cleanup.bash b/test/suites/cleanup.bash
index 33cf02c..b2c53a5 100644
--- a/test/suites/cleanup.bash
+++ b/test/suites/cleanup.bash
@@ -1,10 +1,11 @@
prepare_cleanup_test_dir() {
local dir=$1
+ local i
rm -rf $dir
mkdir -p $dir
- for i in $(seq 0 9); do
- printf '%4017s' '' | tr ' ' 'A' >$dir/result${i}R
+ for ((i = 0; i < 10; ++i)); do
+ printf 'A%.0s' {1..4017} >$dir/result${i}R
backdate $((3 * i + 1)) $dir/result${i}R
done
# NUMFILES: 10, TOTALSIZE: 13 KiB, MAXFILES: 0, MAXSIZE: 0
diff --git a/test/suites/color_diagnostics.bash b/test/suites/color_diagnostics.bash
index 64c7d4f..4ec99f4 100644
--- a/test/suites/color_diagnostics.bash
+++ b/test/suites/color_diagnostics.bash
@@ -113,17 +113,32 @@ color_diagnostics_test() {
expect_stat 'cache miss' 1
expect_stat 'cache hit (preprocessed)' 1
- # -------------------------------------------------------------------------
if $COMPILER_TYPE_GCC; then
+ # ---------------------------------------------------------------------
TEST "-fcolor-diagnostics not accepted for GCC"
generate_code 1 test.c
+
+ if $CCACHE_COMPILE -fcolor-diagnostics -c test.c >&/dev/null; then
+ test_failed "-fcolor-diagnostics unexpectedly accepted by GCC"
+ fi
+
+ # ---------------------------------------------------------------------
+ TEST "-fcolor-diagnostics not accepted for GCC for cached result"
+
+ generate_code 1 test.c
+
+ if ! $CCACHE_COMPILE -c test.c >&/dev/null; then
+ test_failed "unknown error compiling"
+ fi
+
if $CCACHE_COMPILE -fcolor-diagnostics -c test.c >&/dev/null; then
test_failed "-fcolor-diagnostics unexpectedly accepted by GCC"
fi
fi
while read -r case; do
+ # ---------------------------------------------------------------------
TEST "Cache object shared across ${case} (run_second_cpp=$run_second_cpp)"
color_diagnostics_generate_code test1.c
diff --git a/test/suites/inode_cache.bash b/test/suites/inode_cache.bash
index dc8d5f0..ef9c924 100644
--- a/test/suites/inode_cache.bash
+++ b/test/suites/inode_cache.bash
@@ -1,4 +1,9 @@
SUITE_inode_cache_PROBE() {
+ if $HOST_OS_WINDOWS; then
+ echo "inode cache not available on Windows"
+ return
+ fi
+
temp_dir=$(dirname $($CCACHE -k temporary_dir))
fs=$(stat -fLc %T $temp_dir)
if [ "$fs" = "nfs" ]; then
diff --git a/test/suites/nvcc.bash b/test/suites/nvcc.bash
index d73623b..386015d 100644
--- a/test/suites/nvcc.bash
+++ b/test/suites/nvcc.bash
@@ -127,10 +127,10 @@ nvcc_tests() {
expect_stat 'files in cache' 3
$cuobjdump test_cuda.o > test1.dump
expect_equal_content reference_test3.dump test1.dump
-
+
# -------------------------------------------------------------------------
TEST "Option -dc"
-
+
$REAL_NVCC $nvcc_opts_cuda -dc -o reference_test4.o test_cuda.cu
$cuobjdump reference_test4.o > reference_test4.dump
diff --git a/test/suites/pch.bash b/test/suites/pch.bash
index 5729575..97a5c5e 100644
--- a/test/suites/pch.bash
+++ b/test/suites/pch.bash
@@ -609,6 +609,29 @@ pch_suite_gcc() {
expect_stat 'cache hit (direct)' 2
expect_stat 'cache hit (preprocessed)' 0
expect_stat 'cache miss' 1
+
+ # -------------------------------------------------------------------------
+ TEST "Too new PCH file"
+
+ # If the precompiled header is too new we shouldn't cache the result at all
+ # since:
+ #
+ # - the precompiled header content must be included in the hash, but
+ # - we don't trust the precompiled header content so we can't hash it
+ # ourselves, and
+ # - the preprocessed output doesn't contain the preprocessed header content.
+
+ touch lib.h
+ touch main.c
+
+ $REAL_COMPILER $SYSROOT -c lib.h
+ touch -d "@$(($(date +%s) + 60))" lib.h.gch # 1 minute in the future
+
+ CCACHE_SLOPPINESS="$DEFAULT_SLOPPINESS pch_defines,time_macros" $CCACHE_COMPILE -include lib.h -c main.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 0
+ expect_stat "can't use precompiled header" 1
}
pch_suite_clang() {
diff --git a/test/suites/source_date_epoch.bash b/test/suites/source_date_epoch.bash
new file mode 100644
index 0000000..d9d11ba
--- /dev/null
+++ b/test/suites/source_date_epoch.bash
@@ -0,0 +1,99 @@
+SUITE_source_date_epoch_PROBE() {
+ echo 'char x[] = __DATE__;' >test.c
+ if ! SOURCE_DATE_EPOCH=0 $REAL_COMPILER -E test.c | grep -q 1970; then
+ echo "SOURCE_DATE_EPOCH not supported by compiler"
+ fi
+}
+
+SUITE_source_date_epoch_SETUP() {
+ echo 'char x;' >without_temporal_macros.c
+ echo 'char x[] = __DATE__;' >with_date_macro.c
+ echo 'char x[] = __TIME__;' >with_time_macro.c
+}
+
+SUITE_source_date_epoch() {
+ # -------------------------------------------------------------------------
+ TEST "Without temporal macro"
+
+ unset CCACHE_NODIRECT
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c without_temporal_macros.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c without_temporal_macros.c
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c without_temporal_macros.c
+ expect_stat 'cache hit (direct)' 2
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ # -------------------------------------------------------------------------
+ TEST "With __DATE__ macro"
+
+ unset CCACHE_NODIRECT
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_date_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_date_macro.c
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c with_date_macro.c
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 1
+
+ # -------------------------------------------------------------------------
+ TEST "With __TIME__ macro"
+
+ unset CCACHE_NODIRECT
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 2
+
+ # -------------------------------------------------------------------------
+ TEST "With __TIME__ and time_macros sloppiness"
+
+ unset CCACHE_NODIRECT
+
+ CCACHE_SLOPPINESS=time_macros SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ CCACHE_SLOPPINESS=time_macros SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ CCACHE_SLOPPINESS=time_macros SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 2
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 2
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 1
+}
diff --git a/test/suites/split_dwarf.bash b/test/suites/split_dwarf.bash
index d8c3805..28a3293 100644
--- a/test/suites/split_dwarf.bash
+++ b/test/suites/split_dwarf.bash
@@ -142,4 +142,38 @@ SUITE_split_dwarf() {
elif [ ! -f reference.dwo ] && [ -f test.dwo ]; then
test_failed ".dwo not missing"
fi
+
+ # -------------------------------------------------------------------------
+ TEST "Object file without dot"
+
+ $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+ expect_exists test.dwo
+
+ rm test.dwo
+
+ $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+ expect_exists test.dwo
+
+ # -------------------------------------------------------------------------
+ TEST "Object file with two dots"
+
+ $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test.x.y
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+ expect_exists test.x.dwo
+
+ rm test.x.dwo
+
+ $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test.x.y
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+ expect_exists test.x.dwo
}
diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt
index c82a226..48cf058 100644
--- a/unittest/CMakeLists.txt
+++ b/unittest/CMakeLists.txt
@@ -27,7 +27,7 @@ if(INODE_CACHE_SUPPORTED)
endif()
if(WIN32)
- list(APPEND source_files test_Win32Util.cpp)
+ list(APPEND source_files test_bsdmkstemp.cpp test_Win32Util.cpp)
endif()
add_executable(unittest ${source_files})
@@ -36,6 +36,6 @@ target_link_libraries(
unittest
PRIVATE standard_settings standard_warnings ccache_lib third_party_lib)
-target_include_directories(unittest PRIVATE ${CMAKE_BINARY_DIR} . ../src)
+target_include_directories(unittest PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${ccache_SOURCE_DIR}/src)
add_test(NAME unittest COMMAND unittest)
diff --git a/unittest/test_Config.cpp b/unittest/test_Config.cpp
index 3661c69..fad4ff4 100644
--- a/unittest/test_Config.cpp
+++ b/unittest/test_Config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2011-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2011-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -17,8 +17,8 @@
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "../src/Config.hpp"
+#include "../src/Sloppiness.hpp"
#include "../src/Util.hpp"
-#include "../src/ccache.hpp"
#include "../src/exceptions.hpp"
#include "../src/fmtmacros.hpp"
#include "TestUtil.hpp"
@@ -48,6 +48,7 @@ TEST_CASE("Config: default values")
CHECK(config.compression_level() == 0);
CHECK(config.cpp_extension().empty());
CHECK(!config.debug());
+ CHECK(config.debug_dir().empty());
CHECK(!config.depend_mode());
CHECK(config.direct_mode());
CHECK(!config.disable());
@@ -375,6 +376,7 @@ TEST_CASE("Config::visit_items")
"compression_level = 8\n"
"cpp_extension = ce\n"
"debug = false\n"
+ "debug_dir = /dd\n"
"depend_mode = true\n"
"direct_mode = false\n"
"disable = true\n"
@@ -431,6 +433,7 @@ TEST_CASE("Config::visit_items")
"(test.conf) compression_level = 8",
"(test.conf) cpp_extension = ce",
"(test.conf) debug = false",
+ "(test.conf) debug_dir = /dd",
"(test.conf) depend_mode = true",
"(test.conf) direct_mode = false",
"(test.conf) disable = true",
diff --git a/unittest/test_Counters.cpp b/unittest/test_Counters.cpp
index d2382af..b4d3be3 100644
--- a/unittest/test_Counters.cpp
+++ b/unittest/test_Counters.cpp
@@ -17,7 +17,7 @@
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "../src/Counters.hpp"
-#include "../src/Statistics.hpp"
+#include "../src/Statistic.hpp"
#include "TestUtil.hpp"
#include "third_party/doctest.h"
diff --git a/unittest/test_Lockfile.cpp b/unittest/test_Lockfile.cpp
index 02e3672..7a01512 100644
--- a/unittest/test_Lockfile.cpp
+++ b/unittest/test_Lockfile.cpp
@@ -45,6 +45,15 @@ TEST_CASE("Lockfile acquire and release")
CHECK(!Stat::lstat("test.lock"));
}
+TEST_CASE("Lockfile creates missing directories")
+{
+ TestContext test_context;
+
+ Lockfile lock("a/b/c/test", 1000);
+ CHECK(lock.acquired());
+ CHECK(Stat::lstat("a/b/c/test.lock"));
+}
+
#ifndef _WIN32
TEST_CASE("Lockfile breaking")
{
diff --git a/unittest/test_Statistics.cpp b/unittest/test_Statistics.cpp
index 5d6892c..0e647fb 100644
--- a/unittest/test_Statistics.cpp
+++ b/unittest/test_Statistics.cpp
@@ -16,6 +16,7 @@
// this program; if not, write to the Free Software Foundation, Inc., 51
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#include "../src/Statistic.hpp"
#include "../src/Statistics.hpp"
#include "../src/Util.hpp"
#include "../src/fmtmacros.hpp"
diff --git a/unittest/test_Util.cpp b/unittest/test_Util.cpp
index 917c137..5fd5239 100644
--- a/unittest/test_Util.cpp
+++ b/unittest/test_Util.cpp
@@ -146,6 +146,16 @@ TEST_CASE("Util::dir_name")
CHECK(Util::dir_name("/") == "/");
CHECK(Util::dir_name("/foo") == "/");
CHECK(Util::dir_name("/foo/bar/f.txt") == "/foo/bar");
+
+#ifdef _WIN32
+ CHECK(Util::dir_name("C:/x/y") == "C:/x");
+ CHECK(Util::dir_name("X:/x/y") == "X:/x");
+ CHECK(Util::dir_name("C:\\x\\y") == "C:\\x");
+ CHECK(Util::dir_name("C:/x") == "C:/");
+ CHECK(Util::dir_name("C:\\x") == "C:\\");
+ CHECK(Util::dir_name("C:/") == "C:/");
+ CHECK(Util::dir_name("C:\\") == "C:\\");
+#endif
}
TEST_CASE("Util::strip_ansi_csi_seqs")
@@ -366,43 +376,40 @@ TEST_CASE("Util::get_level_1_files")
Util::write_file("0/1/file_c", "12");
Util::write_file("0/f/c/file_d", "123");
- std::vector<std::shared_ptr<CacheFile>> files;
auto null_receiver = [](double) {};
SUBCASE("nonexistent subdirectory")
{
- Util::get_level_1_files("2", null_receiver, files);
+ const auto files = Util::get_level_1_files("2", null_receiver);
CHECK(files.empty());
}
SUBCASE("empty subdirectory")
{
- Util::get_level_1_files("e", null_receiver, files);
+ const auto files = Util::get_level_1_files("e", null_receiver);
CHECK(files.empty());
}
SUBCASE("simple case")
{
- Util::get_level_1_files("0", null_receiver, files);
+ auto files = Util::get_level_1_files("0", null_receiver);
REQUIRE(files.size() == 4);
// Files within a level are in arbitrary order, sort them to be able to
// verify them.
- std::sort(files.begin(),
- files.end(),
- [](const std::shared_ptr<CacheFile>& f1,
- const std::shared_ptr<CacheFile>& f2) {
- return f1->path() < f2->path();
- });
-
- CHECK(files[0]->path() == os_path("0/1/file_b"));
- CHECK(files[0]->lstat().size() == 1);
- CHECK(files[1]->path() == os_path("0/1/file_c"));
- CHECK(files[1]->lstat().size() == 2);
- CHECK(files[2]->path() == os_path("0/f/c/file_d"));
- CHECK(files[2]->lstat().size() == 3);
- CHECK(files[3]->path() == os_path("0/file_a"));
- CHECK(files[3]->lstat().size() == 0);
+ std::sort(
+ files.begin(), files.end(), [](const CacheFile& f1, const CacheFile& f2) {
+ return f1.path() < f2.path();
+ });
+
+ CHECK(files[0].path() == os_path("0/1/file_b"));
+ CHECK(files[0].lstat().size() == 1);
+ CHECK(files[1].path() == os_path("0/1/file_c"));
+ CHECK(files[1].lstat().size() == 2);
+ CHECK(files[2].path() == os_path("0/f/c/file_d"));
+ CHECK(files[2].lstat().size() == 3);
+ CHECK(files[3].path() == os_path("0/file_a"));
+ CHECK(files[3].lstat().size() == 0);
}
}
@@ -443,6 +450,31 @@ TEST_CASE("Util::get_path_in_cache")
== "/zz/ccache/A/B/C/D/EF.suffix");
}
+TEST_CASE("Util::hard_link")
+{
+ TestContext test_context;
+
+ SUBCASE("Link file to nonexistent destination")
+ {
+ Util::write_file("old", "content");
+ CHECK_NOTHROW(Util::hard_link("old", "new"));
+ CHECK(Util::read_file("new") == "content");
+ }
+
+ SUBCASE("Link file to existing destination")
+ {
+ Util::write_file("old", "content");
+ Util::write_file("new", "other content");
+ CHECK_NOTHROW(Util::hard_link("old", "new"));
+ CHECK(Util::read_file("new") == "content");
+ }
+
+ SUBCASE("Link nonexistent file")
+ {
+ CHECK_THROWS_AS(Util::hard_link("old", "new"), Error);
+ }
+}
+
TEST_CASE("Util::int_to_big_endian")
{
uint8_t bytes[8];
@@ -529,6 +561,69 @@ TEST_CASE("Util::is_dir_separator")
#endif
}
+TEST_CASE("Util::make_relative_path")
+{
+ using Util::make_relative_path;
+
+ const TestContext test_context;
+
+ const std::string cwd = Util::get_actual_cwd();
+ const std::string actual_cwd = FMT("{}/d", cwd);
+#ifdef _WIN32
+ const std::string apparent_cwd = actual_cwd;
+#else
+ const std::string apparent_cwd = FMT("{}/s", cwd);
+#endif
+
+ REQUIRE(Util::create_dir("d"));
+#ifndef _WIN32
+ REQUIRE(symlink("d", "s") == 0);
+#endif
+ REQUIRE(chdir("d") == 0);
+ Util::setenv("PWD", apparent_cwd);
+
+ SUBCASE("No base directory")
+ {
+ CHECK(make_relative_path("", "/a", "/a", "/a/x") == "/a/x");
+ }
+
+ SUBCASE("Path matches neither actual nor apparent CWD")
+ {
+#ifdef _WIN32
+ CHECK(make_relative_path("C:/", "C:/a", "C:/b", "C:/x") == "C:/x");
+#else
+ CHECK(make_relative_path("/", "/a", "/b", "/x") == "/x");
+#endif
+ }
+
+ SUBCASE("Match of actual CWD")
+ {
+#ifdef _WIN32
+ CHECK(
+ make_relative_path(
+ actual_cwd.substr(0, 3), actual_cwd, apparent_cwd, actual_cwd + "/x")
+ == "./x");
+#else
+ CHECK(make_relative_path("/", actual_cwd, apparent_cwd, actual_cwd + "/x")
+ == "./x");
+#endif
+ }
+
+#ifndef _WIN32
+ SUBCASE("Match of apparent CWD")
+ {
+ CHECK(make_relative_path("/", actual_cwd, apparent_cwd, apparent_cwd + "/x")
+ == "./x");
+ }
+
+ SUBCASE("Match if using resolved (using realpath(3)) path")
+ {
+ CHECK(make_relative_path("/", actual_cwd, actual_cwd, apparent_cwd + "/x")
+ == "./x");
+ }
+#endif
+}
+
TEST_CASE("Util::matches_dir_prefix_or_file")
{
CHECK(!Util::matches_dir_prefix_or_file("", ""));
diff --git a/unittest/test_argprocessing.cpp b/unittest/test_argprocessing.cpp
index 7e2cbca..7db1ad3 100644
--- a/unittest/test_argprocessing.cpp
+++ b/unittest/test_argprocessing.cpp
@@ -19,7 +19,7 @@
#include "../src/Args.hpp"
#include "../src/Config.hpp"
#include "../src/Context.hpp"
-#include "../src/Statistics.hpp"
+#include "../src/Statistic.hpp"
#include "../src/Util.hpp"
#include "../src/fmtmacros.hpp"
#include "TestUtil.hpp"
diff --git a/unittest/test_bsdmkstemp.cpp b/unittest/test_bsdmkstemp.cpp
new file mode 100644
index 0000000..021c73d
--- /dev/null
+++ b/unittest/test_bsdmkstemp.cpp
@@ -0,0 +1,206 @@
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "../src/Fd.hpp"
+#include "../src/Finalizer.hpp"
+#include "TestUtil.hpp"
+
+#include "third_party/doctest.h"
+#include "third_party/win32/mktemp.h"
+
+#include <algorithm>
+#include <memory>
+#include <ostream>
+#include <sddl.h>
+#include <utility>
+
+using TestUtil::TestContext;
+
+namespace {
+
+class ScopedHANDLE
+{
+public:
+ ScopedHANDLE() = default;
+
+ explicit ScopedHANDLE(HANDLE handle) : m_handle(handle)
+ {
+ }
+
+ ScopedHANDLE(ScopedHANDLE&& other) : ScopedHANDLE(other.release())
+ {
+ }
+
+ ~ScopedHANDLE()
+ {
+ if (m_handle != INVALID_HANDLE_VALUE) {
+ CloseHandle(m_handle);
+ }
+ }
+
+ ScopedHANDLE&
+ operator=(ScopedHANDLE rhs)
+ {
+ std::swap(m_handle, rhs.m_handle);
+ return *this;
+ }
+
+ explicit operator bool() const
+ {
+ return m_handle != INVALID_HANDLE_VALUE;
+ }
+
+ HANDLE
+ get() const
+ {
+ return m_handle;
+ }
+
+ HANDLE
+ release()
+ {
+ HANDLE handle = m_handle;
+ m_handle = INVALID_HANDLE_VALUE;
+ return handle;
+ }
+
+private:
+ HANDLE m_handle = INVALID_HANDLE_VALUE;
+};
+
+} // namespace
+
+TEST_SUITE_BEGIN("bsd_mkstemp");
+
+TEST_CASE("bsd_mkstemp")
+{
+ TestContext test_context;
+
+ static uint16_t rand_iter;
+ rand_iter = 0;
+
+ bsd_mkstemp_set_random_source([](void* buf, size_t nbytes) {
+ std::fill_n(
+ static_cast<uint16_t*>(buf), nbytes / sizeof(uint16_t), rand_iter);
+ ++rand_iter;
+ });
+
+ Finalizer reset_random_source([] { bsd_mkstemp_set_random_source(nullptr); });
+
+ SUBCASE("successful")
+ {
+ std::string path = "XXXXXX";
+ CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno);
+ CHECK(path == "AAAAAA");
+ }
+
+ SUBCASE("existing file")
+ {
+ CHECK_MESSAGE(ScopedHANDLE(CreateFileA("AAAAAA",
+ GENERIC_READ | GENERIC_WRITE,
+ 0,
+ nullptr,
+ CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL,
+ nullptr)),
+ "errno=" << errno);
+
+ std::string path = "XXXXXX";
+ CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno);
+ CHECK(path == "BBBBBB");
+ }
+
+ SUBCASE("existing file, pending delete")
+ {
+ ScopedHANDLE h;
+ CHECK_MESSAGE(
+ (h = ScopedHANDLE(CreateFileA("AAAAAA",
+ GENERIC_READ | GENERIC_WRITE | DELETE,
+ 0,
+ nullptr,
+ CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL,
+ nullptr))),
+ "errno=" << errno);
+
+ // Mark file as deleted. This puts it into a "pending delete" state that
+ // will persist until the handle is closed.
+ FILE_DISPOSITION_INFO info{};
+ info.DeleteFile = TRUE;
+ CHECK_MESSAGE(SetFileInformationByHandle(
+ h.get(), FileDispositionInfo, &info, sizeof(info)),
+ "errno=" << errno);
+
+ std::string path = "XXXXXX";
+ CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno);
+ CHECK(path == "BBBBBB");
+ }
+
+ SUBCASE("existing directory")
+ {
+ CHECK_MESSAGE(CreateDirectoryA("AAAAAA", nullptr), "errno=" << errno);
+
+ std::string path = "XXXXXX";
+ CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno);
+ CHECK(path == "BBBBBB");
+ }
+
+ SUBCASE("permission denied")
+ {
+ auto make_ACL = [](const char* acl_string) {
+ PSECURITY_DESCRIPTOR desc = nullptr;
+ ConvertStringSecurityDescriptorToSecurityDescriptorA(
+ acl_string, SDDL_REVISION_1, &desc, nullptr);
+ return std::shared_ptr<SECURITY_DESCRIPTOR>(
+ static_cast<SECURITY_DESCRIPTOR*>(desc), &LocalFree);
+ };
+
+ // Create a directory with a contrived ACL that denies creation of new files
+ // and directories to the "Everybody" (WD) group.
+ std::shared_ptr<SECURITY_DESCRIPTOR> desc;
+ CHECK_MESSAGE((desc = make_ACL("D:(D;;DCLCRPCR;;;WD)(A;;FA;;;WD)")),
+ "errno=" << errno);
+
+ SECURITY_ATTRIBUTES attrs{};
+ attrs.nLength = sizeof(attrs);
+ attrs.lpSecurityDescriptor = desc.get();
+ CHECK_MESSAGE(CreateDirectoryA("my_readonly_dir", &attrs),
+ "errno=" << errno);
+
+ // Sanity check that we cannot write to this directory. (E.g. Wine doesn't
+ // appear to emulate Windows ACLs properly when run under root.)
+ bool broken_acls = static_cast<bool>(ScopedHANDLE(
+ CreateFileA("my_readonly_dir/.writable",
+ GENERIC_WRITE,
+ 0,
+ nullptr,
+ CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE,
+ nullptr)));
+
+ if (!broken_acls) {
+ std::string path = "my_readonly_dir/XXXXXX";
+ CHECK(!Fd(bsd_mkstemp(&path[0])));
+ CHECK(errno == EACCES);
+ } else {
+ MESSAGE("ACLs do not appear to function properly on this filesystem");
+ }
+ }
+}
+
+TEST_SUITE_END();
diff --git a/unittest/test_ccache.cpp b/unittest/test_ccache.cpp
index e6cd6eb..cd59588 100644
--- a/unittest/test_ccache.cpp
+++ b/unittest/test_ccache.cpp
@@ -17,6 +17,7 @@
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "../src/Context.hpp"
+#include "../src/Sloppiness.hpp"
#include "../src/ccache.hpp"
#include "../src/fmtmacros.hpp"
#include "TestUtil.hpp"