diff options
author | JinWang An <jinwang.an@samsung.com> | 2021-08-03 16:30:32 +0900 |
---|---|---|
committer | JinWang An <jinwang.an@samsung.com> | 2021-08-03 16:30:32 +0900 |
commit | 6b6c8464f9afa1913fc25a79ce78ac9a712f3723 (patch) | |
tree | 47eaacea5a0fb98c7a2b28c83fb897174bad5cdd | |
parent | 72bde18b29fe5c0badacd150129c413546aeaecb (diff) | |
download | ccache-6b6c8464f9afa1913fc25a79ce78ac9a712f3723.tar.gz ccache-6b6c8464f9afa1913fc25a79ce78ac9a712f3723.tar.bz2 ccache-6b6c8464f9afa1913fc25a79ce78ac9a712f3723.zip |
Imported Upstream version 4.2upstream/4.2
107 files changed, 12096 insertions, 1984 deletions
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index d5d0167..5d6c810 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,5 +1,5 @@ --- -name: Bug report +name: 🐞 Bug report about: Create a report to help us improve title: '' labels: bug diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..e587130 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: 🤔 Question or discussion + url: https://github.com/ccache/ccache/discussions + about: Please go to https://github.com/ccache/ccache/discussions diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 38887e1..b89d296 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,5 +1,5 @@ --- -name: Feature request +name: ✨ Feature request about: Suggest a new feature for this project title: '' labels: feature diff --git a/.github/ISSUE_TEMPLATE/improvement.md b/.github/ISSUE_TEMPLATE/improvement.md index de24208..542ec0e 100644 --- a/.github/ISSUE_TEMPLATE/improvement.md +++ b/.github/ISSUE_TEMPLATE/improvement.md @@ -1,5 +1,5 @@ --- -name: Improvement +name: 📈 Improvement about: Suggest an improvement that is neither a bug fix nor a new feature title: '' labels: improvement diff --git a/.github/ISSUE_TEMPLATE/support.md b/.github/ISSUE_TEMPLATE/support.md deleted file mode 100644 index 9d10686..0000000 --- a/.github/ISSUE_TEMPLATE/support.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: Question -about: Ask for support or make an enquiry -title: '' -labels: support -assignees: '' - ---- -### Question ### -<!-- What do you want help with or know about? --> diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 705f1a7..c222a19 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -7,11 +7,15 @@ env: CTEST_OUTPUT_ON_FAILURE: ON VERBOSE: 1 +defaults: + run: + shell: bash + jobs: build_and_test: env: CMAKE_GENERATOR: Ninja - + name: ${{ matrix.config.os }}-${{ matrix.config.compiler }}-${{ matrix.config.version }} runs-on: ${{ matrix.config.os }} strategy: @@ -46,15 +50,9 @@ jobs: compiler: gcc version: "10" - # Enable after https://github.com/ccache/ccache/pull/693 - # - os: ubuntu-16.04 - # compiler: clang - # version: "3.5" - - # Enable after https://github.com/ccache/ccache/pull/693 - # - os: ubuntu-16.04 - # compiler: clang - # version: "5.0" + - os: ubuntu-16.04 + compiler: clang + version: "5.0" - os: ubuntu-16.04 compiler: clang @@ -91,10 +89,14 @@ jobs: - name: Install dependencies run: | if [ "${{ runner.os }}" = "Linux" ]; then + sudo apt-get update + + # Install ld.gold (binutils) and ld.lld on different runs. + # Binding to Ubuntu 20 has no special meaning. if [ "${{ matrix.config.os }}" = "ubuntu-20.04" ]; then - sudo apt-get install -y ninja-build elfutils libzstd-dev + sudo apt-get install -y ninja-build elfutils libzstd-dev lld else - sudo apt-get install -y ninja-build elfutils libzstd1-dev + sudo apt-get install -y ninja-build elfutils libzstd1-dev binutils fi if [ "${{ matrix.config.compiler }}" = "gcc" ]; then @@ -106,11 +108,12 @@ jobs: echo "CC=clang-${{ matrix.config.version }}" >> $GITHUB_ENV echo "CXX=clang++-${{ matrix.config.version }}" >> $GITHUB_ENV - sudo apt update sudo apt install -y clang-${{ matrix.config.version }} g++-multilib fi elif [ "${{ runner.os }}" = "macOS" ]; then - brew install ninja + HOMEBREW_NO_AUTO_UPDATE=1 HOMEBREW_NO_INSTALL_CLEANUP=1 \ + brew install ninja + if [ "${{ matrix.config.compiler }}" = "gcc" ]; then brew install gcc@${{ matrix.config.version }} echo "CC=gcc-${{ matrix.config.version }}" >> $GITHUB_ENV @@ -139,7 +142,7 @@ jobs: if: failure() uses: actions/upload-artifact@v2 with: - name: ${{ matrix.config.os }}-${{ matrix.config.compiler }}-{{ matrix.config.version }}-testdir.tar.xz + name: ${{ matrix.config.os }}-${{ matrix.config.compiler }}-${{ matrix.config.version }}-testdir.tar.xz path: testdir.tar.xz specific_tests: @@ -192,10 +195,32 @@ jobs: CC: x86_64-w64-mingw32-gcc-posix CXX: x86_64-w64-mingw32-g++-posix ENABLE_CACHE_CLEANUP_TESTS: 1 - CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DCMAKE_SYSTEM_NAME=Windows -DZSTD_FROM_INTERNET=ON -DSTATIC_LINK=ON + CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DCMAKE_SYSTEM_NAME=Windows -DZSTD_FROM_INTERNET=ON RUN_TESTS: unittest-in-wine apt_get: elfutils mingw-w64 wine + - name: Windows VS2019 32-bit + os: windows-2019 + msvc_arch: x64_x86 + allow_test_failures: true # For now, don't fail the build on failure + CC: cl + CXX: cl + ENABLE_CACHE_CLEANUP_TESTS: 1 + CMAKE_GENERATOR: Ninja + CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DZSTD_FROM_INTERNET=ON + TEST_CC: clang -target i686-pc-windows-msvc + + - name: Windows VS2019 64-bit + os: windows-2019 + msvc_arch: x64 + allow_test_failures: true # For now, don't fail the build on failure + CC: cl + CXX: cl + ENABLE_CACHE_CLEANUP_TESTS: 1 + CMAKE_GENERATOR: Ninja + CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DZSTD_FROM_INTERNET=ON + TEST_CC: clang -target x86_64-pc-windows-msvc + - name: Clang address & UB sanitizer os: ubuntu-20.04 CC: clang @@ -261,15 +286,40 @@ jobs: - name: Run apt-get if: matrix.config.apt_get != '' - run: sudo apt-get install ${{ matrix.config.apt_get }} + run: sudo apt-get update && sudo apt-get install ${{ matrix.config.apt_get }} + + - name: Prepare Windows environment (Visual Studio) + if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1.5.0 + with: + arch: ${{ matrix.config.msvc_arch }} + + - name: Prepare Windows environment (Clang) + if: runner.os == 'Windows' + shell: powershell + run: | + $ErrorActionPreference = 'Stop' + + # The test suite currently requires that the compiler specified by the + # "CC" environment variable is on a path without spaces. Provide that + # by creating a junction from ~/opt/llvm to the Visual Studio path. + $null = New-Item ` + -Path "${HOME}\opt\llvm" ` + -ItemType Junction ` + -Target "${env:VCINSTALLDIR}\Tools\Llvm\x64" ` + -Force + "Path=${HOME}\opt\llvm\bin;${env:Path}" | ` + Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - name: Build and test + id: build-and-test env: ASAN_OPTIONS: ${{ matrix.config.ASAN_OPTIONS }} BUILDDIR: ${{ matrix.config.BUILDDIR }} CC: ${{ matrix.config.CC }} CCACHE_LOC: ${{ matrix.config.CCACHE_LOC }} CFLAGS: ${{ matrix.config.CFLAGS }} + CMAKE_GENERATOR: ${{ matrix.config.CMAKE_GENERATOR }} CMAKE_PARAMS: ${{ matrix.config.CMAKE_PARAMS }} CXX: ${{ matrix.config.CXX }} CXXFLAGS: ${{ matrix.config.CXXFLAGS }} @@ -278,15 +328,23 @@ jobs: LDFLAGS: ${{ matrix.config.LDFLAGS }} RUN_TESTS: ${{ matrix.config.RUN_TESTS }} SPECIAL: ${{ matrix.config.SPECIAL }} - run: ci/build + TEST_CC: ${{ matrix.config.TEST_CC }} + run: | + rc=0 + ci/build || rc=$? + echo "::set-output name=exit_status::$rc" + exit $rc + # CTest exits with return code 8 on test failure. + continue-on-error: ${{ matrix.config.allow_test_failures == true && + steps.build-and-test.outputs.exit_status == 8 }} - name: Collect testdir from failed tests - if: failure() + if: failure() || steps.build-and-test.outcome == 'failure' run: ci/collect-testdir # TODO: in case of build-and-verify-*package the BUILDDIR is set within those scripts. - name: Upload testdir from failed tests - if: failure() + if: failure() || steps.build-and-test.outcome == 'failure' uses: actions/upload-artifact@v2 with: name: ${{ matrix.config.name }} - testdir.tar.xz @@ -314,7 +372,7 @@ jobs: uses: actions/checkout@v2 - name: Install codespell - run: sudo apt-get install codespell + run: sudo apt-get update && sudo apt-get install codespell - name: Run codespell run: codespell -q 7 -S ".git,LICENSE.adoc,./src/third_party/*" -I misc/codespell-allowlist.txt diff --git a/.github/workflows/codeql-analysis.yaml b/.github/workflows/codeql-analysis.yaml new file mode 100644 index 0000000..972b6b9 --- /dev/null +++ b/.github/workflows/codeql-analysis.yaml @@ -0,0 +1,49 @@ +# More info: +# https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning + +name: "CodeQL" + +on: + push: + branches: ["*"] + pull_request: + # The branches below must be a subset of the branches above + branches: ["*"] + paths-ignore: + - '**/*.adoc' + - '**/*.bash' + - '**/*.md' + schedule: + # Full scan once a week + - cron: '0 14 * * 3' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-18.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + # We must fetch at least the immediate parents so that if this is + # a pull request then we can checkout the head. + fetch-depth: 2 + + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install ninja-build elfutils libzstd1-dev + + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: cpp + queries: +security-and-quality + + - name: Build + run: ci/build + env: + RUN_TESTS: none + CMAKE_GENERATOR: Ninja + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 @@ -11,6 +11,7 @@ Doug Anderson <dianders@disordat.com> Erik Flodin <erik@ejohansson.se> Hongli Lai <hongli@phusion.nl> Jonny Yu <yingshen.yu@gmail.com> +Ka Ho Ng <khng300@gmail.com> Kona Blend <kona8lend@gmail.com> Leanid Chaika <leanid.chaika@gmail.com> Luboš Luňák <l.lunak@centrum.cz> <l.lunak@suse.cz> diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b36dc3..40e21a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,11 @@ cmake_minimum_required(VERSION 3.4.3) -project(ccache LANGUAGES C CXX ASM) +project(ccache LANGUAGES C CXX) +if(MSVC) + enable_language(ASM_MASM) +else() + enable_language(ASM) +endif() set(CMAKE_PROJECT_DESCRIPTION "a fast C/C++ compiler cache") if(NOT "${CMAKE_CXX_STANDARD}") @@ -51,6 +56,17 @@ endif() # # Settings # +include(CcacheVersion) + +if("${CCACHE_VERSION_ORIGIN}" STREQUAL git OR DEFINED ENV{CI}) + set(CCACHE_DEV_MODE ON) +else() + set(CCACHE_DEV_MODE OFF) +endif() +message(STATUS "Ccache dev mode: ${CCACHE_DEV_MODE}") + +include(UseCcache) +include(UseFastestLinker) include(StandardSettings) include(StandardWarnings) include(CIBuildType) @@ -71,7 +87,7 @@ include(GNUInstallDirs) include(GenerateConfigurationFile) include(GenerateVersionFile) -if(HAVE_SYS_MMAN_H) +if(HAVE_SYS_MMAN_H AND HAVE_PTHREAD_MUTEXATTR_SETPSHARED) set(INODE_CACHE_SUPPORTED 1) endif() @@ -98,7 +114,7 @@ include(CodeAnalysis) option(ENABLE_TRACING "Enable possibility to use internal ccache tracing" OFF) if(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "GNU") - option(STATIC_LINK "Link statically with system libraries" OFF) + option(STATIC_LINK "Link statically with system libraries" ON) endif() # diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 46315c4..cb35bc0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,8 +6,8 @@ Want to contribute to ccache? Awesome! There are several options: -1. Ask a question in the [issue - tracker](https://github.com/ccache/ccache/issues/new/choose). +1. Ask a question in + [discussions](https://github.com/ccache/ccache/issues/discussions). 2. Post your question to the [mailing list](https://lists.samba.org/mailman/listinfo/ccache/). 3. Chat in the [Gitter room](https://gitter.im/ccache/ccache). @@ -39,8 +39,8 @@ Here are some hints to make the process smoother: avoid potentially wasting time on doing something that may need major rework to be accepted, or maybe doesn't end up being accepted at all. * Is your pull request "work in progress", i.e. you don't think that it's ready - for merging yet but you want early comments and CI test results? Then create - a draft pull request as described in [this Github blog + for merging yet but you want early comments and CI test results? Then create a + draft pull request as described in [this Github blog post](https://github.blog/2019-02-14-introducing-draft-pull-requests/). * Please follow the ccache's code style (see the section below). * Consider [A Note About Git Commit @@ -50,9 +50,9 @@ Here are some hints to make the process smoother: ## Code style Ccache was written in C99 until 2019 when it started being converted to C++11. -The conversion is a slow work in progress, which is why there is a lot of -C-style code left. Please refrain from doing large C to C++ conversions; do it -little by little. +The conversion is a slow work in progress, which is why there is some C-style +code left. Please refrain from doing large C to C++ conversions; do it little by +little. Source code formatting is defined by `.clang-format` in the root directory. The format is loosely based on [LLVM's code formatting @@ -60,14 +60,15 @@ style](https://llvm.org/docs/CodingStandards.html) with some exceptions. It's highly recommended to install [Clang-Format](https://clang.llvm.org/docs/ClangFormat.html) 6.0 or newer and run `make format` to format changes according to ccache's code style. Or even -better: set up your editor to run Clang-Format automatically when saving. If -you don't run Clang-Format then the ccache authors have to do it for you. +better: set up your editor to run Clang-Format automatically when saving. If you +don't run Clang-Format then the ccache authors have to do it for you. Please follow these conventions: * Use `UpperCamelCase` for types (e.g. classes and structs) and namespaces. * Use `UPPER_CASE` names for macros and (non-class )enum values. -* Use `snake_case` for other names (functions, variables, enum class values, etc.). +* Use `snake_case` for other names (functions, variables, enum class values, + etc.). * Use an `m_` prefix for non-public member variables. * Use a `g_` prefix for global mutable variables. * Use a `k_` prefix for global constants. diff --git a/LICENSE.adoc b/LICENSE.adoc index 9052403..cbce985 100644 --- a/LICENSE.adoc +++ b/LICENSE.adoc @@ -38,7 +38,7 @@ The copyright for ccache as a whole is as follows: ------------------------------------------------------------------------------- Copyright (C) 2002-2007 Andrew Tridgell -Copyright (C) 2009-2020 Joel Rosdahl and other contributors +Copyright (C) 2009-2021 Joel Rosdahl and other contributors ------------------------------------------------------------------------------- @@ -52,8 +52,8 @@ the GPL: that is, if separated from the ccache sources, they may be usable under less restrictive terms. -src/third_party/base32hex.[hc] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/third_party/base32hex.* +~~~~~~~~~~~~~~~~~~~~~~~~~~~ This base32hex implementation comes from <https://github.com/pmconrad/tinydnssec>. @@ -75,8 +75,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. ------------------------------------------------------------------------------- -src/third_party/blake3/*.[hcS] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/third_party/blake3/blake3_* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is a subset of https://github.com/BLAKE3-team/BLAKE3[BLAKE3] 0.3.7 with the following license: @@ -421,12 +421,12 @@ src/third_party/doctest.h ~~~~~~~~~~~~~~~~~~~~~~~~~ This is the single header version of https://github.com/onqtam/doctest[doctest] -2.4.0 with the following license: +2.4.4 with the following license: ------------------------------------------------------------------------------- The MIT License (MIT) -Copyright (c) 2016-2019 Viktor Kirilov +Copyright (c) 2016-2020 Viktor Kirilov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -451,7 +451,7 @@ SOFTWARE. src/third_party/fmt/*.h and src/third_party/format.cpp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This is a subset of https://fmt.dev[fmt] 7.0.3 with the following license: +This is a subset of https://fmt.dev[fmt] 7.1.3 with the following license: ------------------------------------------------------------------------------- Formatting library for C++ @@ -485,8 +485,8 @@ without including the above copyright and permission notices. ------------------------------------------------------------------------------- -src/third_party/getopt_long.[hc] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/third_party/getopt_long.* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This implementation of `getopt_long()` was copied from https://www.postgresql.org[PostgreSQL] and has the following license text: @@ -524,8 +524,8 @@ SUCH DAMAGE. ------------------------------------------------------------------------------- -src/third_party/minitrace.[hc] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/third_party/minitrace.* +~~~~~~~~~~~~~~~~~~~~~~~~~~~ A library for producing JSON traces suitable for Chrome's built-in trace viewer (chrome://tracing). Downloaded from <https://github.com/hrydgard/minitrace>. @@ -558,11 +558,13 @@ SOFTWARE. src/third_party/nonstd/optional.hpp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This alternative implementation of `std::optional` was downloaded from -<https://github.com/martinmoene/optional-lite> and has the following license -text: +This is the single header version of +https://github.com/martinmoene/optional-lite[optional-lite] 3.4.0 with the +following license: ------------------------------------------------------------------------------- +Copyright (c) 2014-2018 Martin Moene + Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization @@ -597,6 +599,8 @@ This alternative implementation of `std::string_view` was downloaded from text: ------------------------------------------------------------------------------- +Copyright 2017-2020 by Martin Moene + Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization @@ -623,8 +627,8 @@ DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------- -src/third_party/win32/getopt.[hc] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/third_party/win32/getopt.* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This implementation of `getopt_long()` for Win32 was taken from https://www.codeproject.com/Articles/157001/Full-getopt-Port-for-Unicode-and-Multibyte-Microso @@ -634,8 +638,32 @@ The full license text can be found in LGPL-3.0.txt and at https://www.gnu.org/licenses/lgpl-3.0.html. -src/third_party/xxh(ash|_x86dispatch).[hc] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/third_party/win32/mktemp.* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This implementation of `mkstemp()` for Win32 was adapted from +<https://github.com/openbsd/src/blob/99b791d14c0f1858d87a0c33b55880fb9b00be66/lib/libc/stdio/mktemp.c> +and has the folowing license text: + +------------------------------------------------------------------------------- +Copyright (c) 1996-1998, 2008 Theo de Raadt +Copyright (c) 1997, 2008-2009 Todd C. Miller + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +------------------------------------------------------------------------------- + +src/third_party/xxh* +~~~~~~~~~~~~~~~~~~~~ xxHash - Extremely Fast Hash algorithm. Copied from xxHash v0.8.0 downloaded from <https://github.com/Cyan4973/xxHash/releases>. @@ -31,6 +31,7 @@ Contributing to ccache * [Mailing list](https://lists.samba.org/mailman/listinfo/ccache/) * [Chat](https://gitter.im/ccache/ccache) * [Bug report info](https://ccache.dev/bugs.html) +* [Discussions](https://github.com/ccache/ccache/discussions) * [Issue tracker](https://github.com/ccache/ccache/issues) * [Help wanted!](https://github.com/ccache/ccache/labels/help%20wanted) * [Good first issues!](https://github.com/ccache/ccache/labels/good%20first%20issue) diff --git a/ci/build-and-verify-source-package b/ci/build-and-verify-source-package index 5a212c2..d74bc15 100755 --- a/ci/build-and-verify-source-package +++ b/ci/build-and-verify-source-package @@ -5,6 +5,9 @@ set -eu +# Unset CI variable to trigger ccache user build mode. +unset CI + # Ninja builds with relative paths so that ccache can be used to cache the build # without resorting to setting base_dir. export CMAKE_GENERATOR=Ninja diff --git a/cmake/CIBuildType.cmake b/cmake/CIBuildType.cmake index 963bc50..e721614 100644 --- a/cmake/CIBuildType.cmake +++ b/cmake/CIBuildType.cmake @@ -25,7 +25,7 @@ set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel CI." FORCE) -string(REPLACE -DNDEBUG "" CMAKE_CXX_FLAGS_CI ${CMAKE_CXX_FLAGS_CI}) -string(REPLACE -DNDEBUG "" CMAKE_C_FLAGS_CI ${CMAKE_C_FLAGS_CI}) +string(REGEX REPLACE "[/-]DNDEBUG" "" CMAKE_CXX_FLAGS_CI ${CMAKE_CXX_FLAGS_CI}) +string(REGEX REPLACE "[/-]DNDEBUG" "" CMAKE_C_FLAGS_CI ${CMAKE_C_FLAGS_CI}) string(STRIP ${CMAKE_CXX_FLAGS_CI} CMAKE_CXX_FLAGS_CI) string(STRIP ${CMAKE_C_FLAGS_CI} CMAKE_C_FLAGS_CI) diff --git a/cmake/CcachePackConfig.cmake b/cmake/CcachePackConfig.cmake index daaca30..a35949d 100644 --- a/cmake/CcachePackConfig.cmake +++ b/cmake/CcachePackConfig.cmake @@ -6,7 +6,7 @@ if(${CMAKE_VERSION} VERSION_LESS "3.9") endif() # From CcacheVersion.cmake. -set(CPACK_PACKAGE_VERSION ${VERSION}) +set(CPACK_PACKAGE_VERSION ${CCACHE_VERSION}) set(CPACK_VERBATIM_VARIABLES ON) @@ -18,7 +18,7 @@ endif() set( CPACK_PACKAGE_FILE_NAME - "ccache-${VERSION}-${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}" + "ccache-${CCACHE_VERSION}-${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}" ) include(CPack) diff --git a/cmake/CcacheVersion.cmake b/cmake/CcacheVersion.cmake index 0af8f03..386d6ca 100644 --- a/cmake/CcacheVersion.cmake +++ b/cmake/CcacheVersion.cmake @@ -1,3 +1,8 @@ +# This script sets two variables: +# +# - CCACHE_VERSION (version string) +# - CCACHE_VERSION_ORIGIN (archive or git) +# # There are three main scenarios: # # 1. Building from a source code archive generated by "git archive", e.g. the @@ -13,25 +18,32 @@ # 3. Building from a Git repository. In this case the version will be a proper # version if building a tagged commit, otherwise "branch.hash(+dirty)". In # case Git is not available, the version will be "unknown". +# +# CCACHE_VERSION_ORIGIN is set to "archive" in scenario 1 and "git" in scenario +# 3. -set(version_info "897b6065398b5e80402ae1c51a60a2cefc765ed1 HEAD, tag: v4.1, origin/master, origin/HEAD, master") +set(version_info "12ecd73fcd8aa7024d5851c1738223b8aff0c6e9 HEAD, tag: v4.2, origin/master, origin/HEAD, master") if(version_info MATCHES "^([0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f])[0-9a-f]* (.*)") # Scenario 1. + set(CCACHE_VERSION_ORIGIN archive) + set(hash "${CMAKE_MATCH_1}") set(ref_names "${CMAKE_MATCH_2}") if(ref_names MATCHES "tag: v([^,]+)") # Tagged commit. - set(VERSION "${CMAKE_MATCH_1}") + set(CCACHE_VERSION "${CMAKE_MATCH_1}") else() # Untagged commit. - set(VERSION "${hash}") + set(CCACHE_VERSION "${hash}") endif() elseif(EXISTS "${CMAKE_SOURCE_DIR}/.git") # Scenario 3. + set(CCACHE_VERSION_ORIGIN git) + find_package(Git QUIET) if(NOT GIT_FOUND) - set(VERSION "unknown") + set(CCACHE_VERSION "unknown") message(WARNING "Could not find git") else() macro(git) @@ -43,9 +55,9 @@ elseif(EXISTS "${CMAKE_SOURCE_DIR}/.git") git(describe --abbrev=8 --dirty) if(git_stdout MATCHES "^v([^-]+)(-dirty)?$") - set(VERSION "${CMAKE_MATCH_1}") + set(CCACHE_VERSION "${CMAKE_MATCH_1}") if(NOT "${CMAKE_MATCH_2}" STREQUAL "") - set(VERSION "${VERSION}+dirty") + set(CCACHE_VERSION "${CCACHE_VERSION}+dirty") endif() elseif(git_stdout MATCHES "^v[^-]+-[0-9]+-g([0-9a-f]+)(-dirty)?$") set(hash "${CMAKE_MATCH_1}") @@ -55,12 +67,14 @@ elseif(EXISTS "${CMAKE_SOURCE_DIR}/.git") git(rev-parse --abbrev-ref HEAD) set(branch "${git_stdout}") - set(VERSION "${branch}.${hash}${dirty}") + set(CCACHE_VERSION "${branch}.${hash}${dirty}") endif() # else: fail below endif() endif() -if(VERSION STREQUAL "") +if(CCACHE_VERSION STREQUAL "") # Scenario 2 or unexpected error. message(SEND_ERROR "Cannot determine Ccache version") endif() + +message(STATUS "Ccache version: ${CCACHE_VERSION}") diff --git a/cmake/CheckAsmCompilerFlag.cmake b/cmake/CheckAsmCompilerFlag.cmake deleted file mode 100644 index 07f5f8e..0000000 --- a/cmake/CheckAsmCompilerFlag.cmake +++ /dev/null @@ -1,62 +0,0 @@ -include(CMakeCheckCompilerFlagCommonPatterns) - -function(check_asm_compiler_flag flag var) - if(DEFINED "${var}") - return() - endif() - - set(locale_vars LC_ALL LC_MESSAGES LANG) - foreach(v IN LISTS locale_vars) - set(locale_vars_saved_${v} "$ENV{${v}}") - set(ENV{${v}} C) - endforeach() - - check_compiler_flag_common_patterns(common_patterns) - - set(test_file "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.S") - file(WRITE "${test_file}" ".global main\nmain:\n") - - if(NOT CMAKE_REQUIRED_QUIET) - message(STATUS "Performing Test ${var}") - endif() - try_compile( - ${var} - "${CMAKE_BINARY_DIR}" - "${test_file}" - COMPILE_DEFINITIONS "${flag}" - OUTPUT_VARIABLE output) - - check_compiler_flag_common_patterns(common_fail_patterns) - - foreach(regex ${common_fail_patterns}) - if("${output}" MATCHES "${regex}") - set(${var} 0) - endif() - endforeach() - - if(${${var}}) - set(${var} 1 CACHE INTERNAL "Test ${var}") - if(NOT CMAKE_REQUIRED_QUIET) - message(STATUS "Performing Test ${var} - Success") - endif() - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log - "Performing ASM SOURCE FILE Test ${var} succeeded with the following output:\n" - "${output}\n" - "Source file was:\n${test_file}\n") - else() - if(NOT CMAKE_REQUIRED_QUIET) - message(STATUS "Performing Test ${var} - Failed") - endif() - set(${var} "" CACHE INTERNAL "Test ${var}") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log - "Performing ASM SOURCE FILE Test ${var} failed with the following output:\n" - "${output}\n" - "Source file was:\n${test_file}\n") - endif() - - foreach(v IN LISTS locale_vars) - set(ENV{${v}} ${locale_vars_saved_${v}}) - endforeach() - - set(${var} "${${var}}" PARENT_SCOPE) -endfunction() diff --git a/cmake/DefaultBuildType.cmake b/cmake/DefaultBuildType.cmake index 87b7647..630ecfb 100644 --- a/cmake/DefaultBuildType.cmake +++ b/cmake/DefaultBuildType.cmake @@ -6,7 +6,7 @@ endif() # Default to Release for end user builds (from source archive) and Debug for # development builds (in a Git repository). -if(EXISTS "${CMAKE_SOURCE_DIR}/.git") +if(CCACHE_DEV_MODE) set( CMAKE_BUILD_TYPE "Debug" CACHE STRING "Choose the type of build." FORCE) diff --git a/cmake/DevModeWarnings.cmake b/cmake/DevModeWarnings.cmake new file mode 100644 index 0000000..7ff5411 --- /dev/null +++ b/cmake/DevModeWarnings.cmake @@ -0,0 +1,136 @@ +include(CheckCXXCompilerFlag) + +# check_cxx_compiler_flag caches the result, so a unique variable name is +# required for every flag to be checked. +# +# Parameters: +# +# * flag [in], e.g. FLAG +# * var_name_of_var_name [in], e.g. "TEMP". This is the variable that "HAS_FLAG" +# will be written to. +function(generate_unique_has_flag_var_name flag var_name_of_var_name) + string(REGEX REPLACE "[=-]" "_" var_name "${flag}") + string(TOUPPER "${var_name}" var_name) + set(${var_name_of_var_name} "HAS_${var_name}" PARENT_SCOPE) +endfunction() + +macro(add_compile_flag_if_supported_ex varname flag alternative_flag) + # has_flag will contain "HAS_$flag" so each flag gets a unique HAS variable. + generate_unique_has_flag_var_name("${flag}" "has_flag") + + # Instead of passing "has_flag" this passes the content of has_flag. + check_cxx_compiler_flag("${flag}" "${has_flag}") + + if(${${has_flag}}) + list(APPEND "${varname}" "${flag}") + elseif("${alternative_flag}") + add_compile_flag_if_supported_ex("${varname}" ${alternative_flag} "") + endif() +endmacro() + +macro(add_compile_flag_if_supported varname flag) + add_compile_flag_if_supported_ex("${varname}" "${flag}" "") +endmacro() + +set( + _clang_gcc_warnings + -Wextra + -Wnon-virtual-dtor + -Wcast-align + -Wunused + -Woverloaded-virtual + -Wpedantic + + # Candidates for enabling in the future: + # -Wshadow + # -Wold-style-cast + # -Wconversion + # -Wsign-conversion + # -Wnull-dereference + # -Wformat=2 +) + +# Tested separately as this is not supported by Clang 3.4. +add_compile_flag_if_supported(_clang_gcc_warnings "-Wdouble-promotion") + +if(WARNINGS_AS_ERRORS) + list(APPEND _clang_gcc_warnings -Werror) +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + list(APPEND CCACHE_COMPILER_WARNINGS ${_clang_gcc_warnings}) + + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) + list( + APPEND + CCACHE_COMPILER_WARNINGS + -Qunused-arguments + -Wno-error=unreachable-code + ) + endif() + + # If compiler supports -Wshadow-field-in-constructor, disable only that. + # Otherwise disable shadow. + add_compile_flag_if_supported_ex( + CCACHE_COMPILER_WARNINGS "-Wno-shadow-field-in-constructor" "-Wno-shadow") + + # Disable C++20 compatibility for now. + add_compile_flag_if_supported(CCACHE_COMPILER_WARNINGS "-Wno-c++2a-compat") + + # If compiler supports these warnings they have to be disabled for now. + add_compile_flag_if_supported( + CCACHE_COMPILER_WARNINGS "-Wno-zero-as-null-pointer-constant") + add_compile_flag_if_supported( + CCACHE_COMPILER_WARNINGS "-Wno-undefined-func-template") + add_compile_flag_if_supported( + CCACHE_COMPILER_WARNINGS "-Wno-return-std-move-in-c++11") +elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + list( + APPEND + CCACHE_COMPILER_WARNINGS + ${_clang_gcc_warnings} + + # Warn about logical operations being used where bitwise were probably + # wanted. + -Wlogical-op + + # Candidates for enabling in the future: + # -Wduplicated-cond + # -Wduplicated-branches + # -Wuseless-cast + ) + + # TODO: Exact version or reason unknown, discovered in Ubuntu 14 Docker test + # with GCC 4.8.4 + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8.5) + add_compile_flag_if_supported( + CCACHE_COMPILER_WARNINGS "-Wno-missing-field-initializers") + add_compile_flag_if_supported( + CCACHE_COMPILER_WARNINGS "-Wno-unused-variable") + endif() +elseif(MSVC) + # Remove any warning level flags added by CMake. + string(REGEX REPLACE "/W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REGEX REPLACE "/W[0-4]" "" CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS}") + string(REGEX REPLACE "/W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + if(WARNINGS_AS_ERRORS) + list(APPEND CCACHE_COMPILER_WARNINGS /WX) + endif() + + list( + APPEND + CCACHE_COMPILER_WARNINGS + /W4 + # Ignore bad macro in winbase.h triggered by /Zc:preprocessor: + /wd5105 + # Conversion warnings: + /wd4244 + /wd4245 + /wd4267 + # Assignment in conditional: + /wd4706 + # Non-underscore-prefixed POSIX functions: + /wd4996 + ) +endif() diff --git a/cmake/Findzstd.cmake b/cmake/Findzstd.cmake index 848348f..0044937 100644 --- a/cmake/Findzstd.cmake +++ b/cmake/Findzstd.cmake @@ -6,7 +6,7 @@ if(ZSTD_FROM_INTERNET) # Although ${zstd_FIND_VERSION} was requested, let's download a newer version. # Note: The directory structure has changed in 1.3.0; we only support 1.3.0 # and newer. - set(zstd_version "1.4.5") + set(zstd_version "1.4.8") set(zstd_url https://github.com/facebook/zstd/archive/v${zstd_version}.tar.gz) set(zstd_dir ${CMAKE_BINARY_DIR}/zstd-${zstd_version}) diff --git a/cmake/GenerateConfigurationFile.cmake b/cmake/GenerateConfigurationFile.cmake index a21861f..6e6b604 100644 --- a/cmake/GenerateConfigurationFile.cmake +++ b/cmake/GenerateConfigurationFile.cmake @@ -30,7 +30,6 @@ set(functions getopt_long getpwuid gettimeofday - mkstemp posix_fallocate realpath setenv @@ -57,6 +56,7 @@ check_c_source_compiles( } ]=] HAVE_PTHREAD_MUTEX_ROBUST) +check_function_exists(pthread_mutexattr_setpshared HAVE_PTHREAD_MUTEXATTR_SETPSHARED) set(CMAKE_REQUIRED_LINK_OPTIONS) include(CheckStructHasMember) @@ -67,18 +67,19 @@ check_struct_has_member("struct stat" st_mtim sys/stat.h check_struct_has_member("struct statfs" f_fstypename sys/mount.h HAVE_STRUCT_STATFS_F_FSTYPENAME) -include(CheckCXXCompilerFlag) - -# Old GCC versions don't have the required header support. -# Old Apple Clang versions seem to support -mavx2 but not the target -# attribute that's used to enable AVX2 for a certain function. -if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) - OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0)) - message(STATUS "Detected unsupported compiler for HAVE_AVX2 - disabled") - set(HAVE_AVX2 FALSE) -else() - check_cxx_compiler_flag(-mavx2 HAVE_AVX2) -endif() +include(CheckCXXSourceCompiles) +check_cxx_source_compiles( + [=[ + #include <immintrin.h> + void func() __attribute__((target("avx2"))); + void func() { _mm256_abs_epi8(_mm256_set1_epi32(42)); } + int main() + { + func(); + return 0; + } + ]=] + HAVE_AVX2) list(APPEND CMAKE_REQUIRED_LIBRARIES ws2_32) list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ws2_32) diff --git a/cmake/GenerateVersionFile.cmake b/cmake/GenerateVersionFile.cmake index 1517d44..7b2ad0c 100644 --- a/cmake/GenerateVersionFile.cmake +++ b/cmake/GenerateVersionFile.cmake @@ -1,6 +1,4 @@ -include(CcacheVersion) configure_file( ${CMAKE_SOURCE_DIR}/cmake/version.cpp.in ${CMAKE_BINARY_DIR}/src/version.cpp @ONLY) -message(STATUS "Ccache version: ${VERSION}") diff --git a/cmake/StandardWarnings.cmake b/cmake/StandardWarnings.cmake index 3507737..6a0ca71 100644 --- a/cmake/StandardWarnings.cmake +++ b/cmake/StandardWarnings.cmake @@ -2,161 +2,21 @@ # be linked privately by all product and test code, but not by third party code. add_library(standard_warnings INTERFACE) -if(IS_DIRECTORY "${CMAKE_SOURCE_DIR}/.git" OR DEFINED ENV{"CI"}) - # Enabled by default for development builds and CI builds. +if(CCACHE_DEV_MODE) + # Enabled by default for developer builds. option(WARNINGS_AS_ERRORS "Treat compiler warnings as errors" TRUE) else() - # Disabled by default for end user builds so compilation doesn't fail with new + # Disabled by default for user builds so compilation doesn't fail with new # compilers that may emit new warnings. option(WARNINGS_AS_ERRORS "Treat compiler warnings as errors" FALSE) endif() -include(CheckCXXCompilerFlag) - -# check_cxx_compiler_flag caches the result, so a unique variable name is -# required for every flag to be checked. -# -# Parameters: -# -# * flag [in], e.g. FLAG -# * var_name_of_var_name [in], e.g. "TEMP". This is the variable that "HAS_FLAG" -# will be written to. -function(generate_unique_has_flag_var_name flag var_name_of_var_name) - string(REGEX REPLACE "[=-]" "_" var_name "${flag}") - string(TOUPPER "${var_name}" var_name) - set(${var_name_of_var_name} "HAS_${var_name}" PARENT_SCOPE) -endfunction() - -function(add_target_compile_flag_if_supported_ex target flag alternative_flag) - # has_flag will contain "HAS_$flag" so each flag gets a unique HAS variable. - generate_unique_has_flag_var_name("${flag}" "has_flag") - - # Instead of passing "has_flag" this passes the content of has_flag. - check_cxx_compiler_flag("${flag}" "${has_flag}") - - if(${${has_flag}}) - target_compile_options(${target} INTERFACE "${flag}") - elseif("${alternative_flag}") - add_target_compile_flag_if_supported_ex(${target} ${alternative_flag} "") - endif() -endfunction() - -# TODO: Is there a better way to provide an optional third argument? -macro(add_target_compile_flag_if_supported target flag) - add_target_compile_flag_if_supported_ex("${target}" "${flag}" "") -endmacro() - -set(CLANG_GCC_WARNINGS - -Wall - -Wextra - -Wnon-virtual-dtor - -Wcast-align - -Wunused - -Woverloaded-virtual - -Wpedantic - - # Candidates for enabling in the future: - # -Wshadow - # -Wold-style-cast - # -Wconversion - # -Wsign-conversion - # -Wnull-dereference - # -Wformat=2 -) -# Tested separately as this is not supported by Clang 3.4. -add_target_compile_flag_if_supported(standard_warnings "-Wdouble-promotion") - -if(WARNINGS_AS_ERRORS) - set(CLANG_GCC_WARNINGS ${CLANG_GCC_WARNINGS} -Werror) +if(NOT MSVC) + set(CCACHE_COMPILER_WARNINGS -Wall) endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) - set( - CLANG_GCC_WARNINGS - ${CLANG_GCC_WARNINGS} - -Qunused-arguments - -Wno-error=unreachable-code) - endif() - - target_compile_options( - standard_warnings - INTERFACE - ${CLANG_GCC_WARNINGS} - -Weverything - -Wno-c++98-compat-pedantic - -Wno-c++98-compat - -Wno-constexpr-not-const - -Wno-conversion - -Wno-disabled-macro-expansion - -Wno-documentation-unknown-command - -Wno-exit-time-destructors - -Wno-format-nonliteral - -Wno-global-constructors - -Wno-implicit-fallthrough - -Wno-padded - -Wno-shadow # Warnings in fmtlib - -Wno-shorten-64-to-32 - -Wno-sign-conversion - -Wno-signed-enum-bitfield # Warnings in fmtlib - -Wno-weak-vtables - -Wno-old-style-cast) - - # If compiler supports -Wshadow-field-in-constructor, disable only that. - # Otherwise disable shadow. - add_target_compile_flag_if_supported_ex( - standard_warnings "-Wno-shadow-field-in-constructor" "-Wno-shadow") - - # Disable C++20 compatibility for now. - add_target_compile_flag_if_supported(standard_warnings "-Wno-c++2a-compat") - - # If compiler supports these warnings they have to be disabled for now. - add_target_compile_flag_if_supported( - standard_warnings "-Wno-zero-as-null-pointer-constant") - add_target_compile_flag_if_supported( - standard_warnings "-Wno-undefined-func-template") - add_target_compile_flag_if_supported( - standard_warnings "-Wno-return-std-move-in-c++11") -elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - target_compile_options( - standard_warnings - INTERFACE ${CLANG_GCC_WARNINGS} - # Warn about logical operations being used where bitwise were probably - # wanted. - -Wlogical-op - - # Candidates for enabling in the future: - # -Wduplicated-cond - # -Wduplicated-branches - # -Wuseless-cast - ) - - # TODO: Exact version or reason unknown, discovered in Ubuntu 14 Docker test - # with GCC 4.8.4 - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8.5) - add_target_compile_flag_if_supported( - standard_warnings "-Wno-missing-field-initializers") - add_target_compile_flag_if_supported( - standard_warnings "-Wno-unused-variable") - endif() -elseif(MSVC) - # Remove any warning level flags added by CMake. - string(REGEX REPLACE "/W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") - string(REGEX REPLACE "/W[0-4]" "" CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS}") - string(REGEX REPLACE "/W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - - target_compile_options( - standard_warnings - INTERFACE - /W4 - # Ignore bad macro in winbase.h triggered by /Zc:preprocessor - /wd5105 - # Conversion warnings. - /wd4244 - /wd4267 - # Assignment in conditional. - /wd4706 - # Non-underscore-prefixed POSIX functions. - /wd4996 - ) +if(CCACHE_DEV_MODE) + include(DevModeWarnings) endif() + +target_compile_options(standard_warnings INTERFACE ${CCACHE_COMPILER_WARNINGS}) diff --git a/cmake/UseCcache.cmake b/cmake/UseCcache.cmake new file mode 100644 index 0000000..e89339a --- /dev/null +++ b/cmake/UseCcache.cmake @@ -0,0 +1,69 @@ +# Note: Compiling ccache via ccache is fine because the ccache version installed +# in the system is used. + +# Calls `message(VERBOSE msg)` if and only if VERBOSE is available (since CMake +# 3.15). Call CMake with --log-level=VERBOSE to view verbose messages. +function(message_verbose msg) + if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15") + message(VERBOSE ${msg}) + endif() +endfunction() + +function(use_ccache) + if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + message(WARNING "use_ccache() disabled, as it is not called from the project top level") + return() + endif() + + find_program(CCACHE_PROGRAM ccache) + if(NOT CCACHE_PROGRAM) + message_verbose("Ccache program not found, not enabling ccache for faster recompilation") + return() + endif() + + message_verbose("Ccache enabled for faster recompilation") + + # Note: This will override any config and environment settings. + set(ccache_env + # Another option would be CMAKE_BINARY_DIR, but currently only one base + # directory is supported. + CCACHE_BASEDIR=${CMAKE_SOURCE_DIR} + + # In case of very old ccache versions (pre 3.3). + CCACHE_CPP2=true + ) + + if(CMAKE_GENERATOR MATCHES "Ninja|Makefiles") + find_program(ENV_PROGRAM env) + if(ENV_PROGRAM) + set(env_program ${ENV_PROGRAM}) # faster than "cmake -E env" + else() + set(env_program ${CMAKE_COMMAND} -E env) + endif() + foreach(lang IN ITEMS C CXX OBJC OBJCXX CUDA) + set(CMAKE_${lang}_COMPILER_LAUNCHER + ${env_program} ${ccache_env} ${CCACHE_PROGRAM} + PARENT_SCOPE) + endforeach() + elseif(CMAKE_GENERATOR STREQUAL Xcode) + foreach(lang IN ITEMS C CXX) + set(launcher ${CMAKE_BINARY_DIR}/launch-${lang}) + file(WRITE ${launcher} "#!/bin/bash\n\n") + foreach(key_val IN LISTS ccache_env) + file(APPEND ${launcher} "export ${key_val}\n") + endforeach() + file(APPEND ${launcher} + "exec \"${CCACHE_PROGRAM}\" \"${CMAKE_${lang}_COMPILER}\" \"$@\"\n") + execute_process(COMMAND chmod a+rx ${launcher}) + endforeach() + set(CMAKE_XCODE_ATTRIBUTE_CC ${CMAKE_BINARY_DIR}/launch-C PARENT_SCOPE) + set(CMAKE_XCODE_ATTRIBUTE_CXX ${CMAKE_BINARY_DIR}/launch-CXX PARENT_SCOPE) + set(CMAKE_XCODE_ATTRIBUTE_LD ${CMAKE_BINARY_DIR}/launch-C PARENT_SCOPE) + set(CMAKE_XCODE_ATTRIBUTE_LDPLUSPLUS ${CMAKE_BINARY_DIR}/launch-CXX PARENT_SCOPE) + endif() +endfunction() + +option(USE_CCACHE "Use ccache to speed up recompilation time" TRUE) +if(USE_CCACHE) + use_ccache() +endif() diff --git a/cmake/UseFastestLinker.cmake b/cmake/UseFastestLinker.cmake new file mode 100644 index 0000000..c96639c --- /dev/null +++ b/cmake/UseFastestLinker.cmake @@ -0,0 +1,34 @@ +# Calls `message(VERBOSE msg)` if and only if VERBOSE is available (since CMake 3.15). +# Call CMake with --loglevel=VERBOSE to view those messages. +function(message_verbose msg) + if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15") + message(VERBOSE ${msg}) + endif() +endfunction() + +function(use_fastest_linker) + if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + message(WARNING "use_fastest_linker() disabled, as it is not called at the project top level") + return() + endif() + + find_program(FASTER_LINKER ld.lld) + if(NOT FASTER_LINKER) + find_program(FASTER_LINKER ld.gold) + endif() + + if(FASTER_LINKER) + # Note: Compiler flag -fuse-ld requires gcc 9 or clang 3.8. + # Instead override CMAKE_CXX_LINK_EXECUTABLE directly. + # By default CMake uses the compiler executable for linking. + set(CMAKE_CXX_LINK_EXECUTABLE "${FASTER_LINKER} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>") + message_verbose("Using ${FASTER_LINKER} linker for faster linking") + else() + message_verbose("Using default linker") + endif() +endfunction() + +option(USE_FASTER_LINKER "Use the lld or gold linker instead of the default for faster linking" TRUE) +if(USE_FASTER_LINKER) + use_fastest_linker() +endif() diff --git a/cmake/config.h.in b/cmake/config.h.in index 28a0706..b4e412f 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -27,11 +27,13 @@ // features on Mac OS X, so we need _DARWIN_C_SOURCE to re-enable them. #cmakedefine _DARWIN_C_SOURCE -// Define to activate features from IEEE Stds 1003.1-2001. +// Define to activate features from IEEE Stds 1003.1-2008. #define _POSIX_C_SOURCE 200809L #if defined(__SunOS_5_8) || defined(__SunOS_5_9) || defined(__SunOS_5_10) # define _XOPEN_SOURCE 500 +#elif defined(__FreeBSD__) +# define _XOPEN_SOURCE 700 #elif !defined(__SunOS_5_11) && !defined(__APPLE__) # define _XOPEN_SOURCE #endif @@ -83,12 +85,12 @@ // Define if the system has the type "long long". #cmakedefine HAVE_LONG_LONG -// Define if you have the "mkstemp" function. -#cmakedefine HAVE_MKSTEMP - // Define if you have the "posix_fallocate. #cmakedefine HAVE_POSIX_FALLOCATE +// Define if you have the "pthread_mutexattr_setpshared" function. +#cmakedefine HAVE_PTHREAD_MUTEXATTR_SETPSHARED + // Define if you have the <pwd.h> header file. #cmakedefine HAVE_PWD_H diff --git a/cmake/version.cpp.in b/cmake/version.cpp.in index 291f049..3b87746 100644 --- a/cmake/version.cpp.in +++ b/cmake/version.cpp.in @@ -1,2 +1,2 @@ extern const char CCACHE_VERSION[]; -const char CCACHE_VERSION[] = "@VERSION@"; +const char CCACHE_VERSION[] = "@CCACHE_VERSION@"; diff --git a/doc/AUTHORS.adoc b/doc/AUTHORS.adoc index eb15cef..dc3717c 100644 --- a/doc/AUTHORS.adoc +++ b/doc/AUTHORS.adoc @@ -18,6 +18,7 @@ Ccache is a collective work with contributions from many people, including: * Andrew Stubbs * Andrew Tridgell * Arne Hasselbring +* Azat Khuzhin * Bernhard Bauer * Björn Jacke * Breno Guimaraes @@ -54,7 +55,9 @@ Ccache is a collective work with contributions from many people, including: * Jørgen P. Tjernø * Josh Soref * Justin Lebar +* Ka Ho Ng * Karl Chen +* Khem Raj * Kona Blend * Kovarththanan Rajaratnam * Lalit Chhabra @@ -82,6 +85,7 @@ Ccache is a collective work with contributions from many people, including: * Mizuha Himuraki * Mostyn Bramley-Moore * Neil Mushell +* Nicholas Hutchinson * Nick Schultz * Norbert Lange * Oded Shimon @@ -108,7 +112,9 @@ Ccache is a collective work with contributions from many people, including: * Ryan Brown * Ryan Egesdahl * Sam Gross +* Sergei Trofimovich * Steffen Dettmer +* Stuart Henderson * Sumit Jamgade * Thomas Otto * Thomas Röfer diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index b5c9f24..dda4703 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -2,7 +2,7 @@ find_program(ASCIIDOC_EXE asciidoc) mark_as_advanced(ASCIIDOC_EXE) # Don't show in CMake UIs if(NOT ASCIIDOC_EXE) - message(WARNING "Could not find asciidoc; documentation will not be generated") + message(NOTICE "Could not find asciidoc; documentation will not be generated") else() # # HTML documentation @@ -15,7 +15,7 @@ else() COMMAND ${ASCIIDOC_EXE} -o "${html_file}" - -a revnumber="${VERSION}" + -a revnumber="${CCACHE_VERSION}" -a toc -b xhtml11 "${CMAKE_SOURCE_DIR}/${adoc_file}" @@ -46,7 +46,7 @@ else() COMMAND ${ASCIIDOC_EXE} -o - - -a revnumber=${VERSION} + -a revnumber=${CCACHE_VERSION} -d manpage -b docbook "${CMAKE_SOURCE_DIR}/doc/MANUAL.adoc" | perl -pe 's!<literal>\(.*?\)</literal>!<emphasis role="strong">\\1</emphasis>!g' @@ -58,7 +58,7 @@ else() COMMAND ${A2X_EXE} --doctype manpage --format manpage MANUAL.xml MAIN_DEPENDENCY MANUAL.xml ) - add_custom_target(doc-man-page ALL DEPENDS ccache.1) + add_custom_target(doc-man-page DEPENDS ccache.1) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/ccache.1" DESTINATION "${CMAKE_INSTALL_MANDIR}/man1" @@ -66,5 +66,5 @@ else() set(doc_files "${doc_files}" ccache.1) endif() - add_custom_target(doc DEPENDS "${doc_files}") + add_custom_target(doc ALL DEPENDS "${doc_files}") endif() diff --git a/doc/INSTALL.md b/doc/INSTALL.md index 77c2a72..8a22618 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -20,6 +20,8 @@ To build ccache you need: from the Internet and unpack it in the local binary tree. Ccache will then be linked statically to the locally built libzstd. + To link libzstd statically you can use `-DZSTD_LIBRARY=/path/to/libzstd.a`. + Optional: - GNU Bourne Again SHell (bash) for tests. diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index b7d8912..98aae9a 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -63,8 +63,8 @@ is being used. WARNING: The technique of letting ccache masquerade as the compiler works well, but currently doesn't interact well with other tools that do the same thing. -See <<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER -COMPILER WRAPPERS>>. +See _<<_using_ccache_with_other_compiler_wrappers,Using ccache with other +compiler wrappers>>_. WARNING: Use a symbolic links for masquerading, not hard links. @@ -134,11 +134,11 @@ Common options *`-X`* _LEVEL_, *`--recompress`* _LEVEL_:: - Recompress the cache using compression level _LEVEL_. The level can be an - integer, with the same semantics as the - <<config_compression_level,*compression_level*>> configuration setting), or + Recompress the cache to level _LEVEL_ using the Zstandard algorithm. The + level can be an integer, with the same semantics as the + <<config_compression_level,*compression_level*>> configuration option), or the special value *uncompressed* for no compression. See - <<_cache_compression,CACHE COMPRESSION>> for more information. This can + _<<_cache_compression,Cache compression>>_ for more information. This can potentionally take a long time since all files in the cache need to be visited. Only files that are currently compressed with a different level than _LEVEL_ will be recompressed. @@ -146,13 +146,13 @@ Common options *`-o`* _KEY=VALUE_, *`--set-config`* _KEY_=_VALUE_:: Set configuration option _KEY_ to _VALUE_. See - <<_configuration,CONFIGURATION>> for more information. + _<<_configuration,Configuration>>_ for more information. *`-x`*, *`--show-compression`*:: - Print cache compression statistics. See <<_cache_compression,CACHE - COMPRESSION>> for more information. This can potentionally take a long time - since all files in the cache need to be visited. + Print cache compression statistics. See _<<_cache_compression,Cache + compression>>_ for more information. This can potentionally take a long + time since all files in the cache need to be visited. *`-p`*, *`--show-config`*:: @@ -200,7 +200,7 @@ Options for scripting or debugging *`-k`* _KEY_, *`--get-config`* _KEY_:: Print the value of configuration option _KEY_. See - <<_configuration,CONFIGURATION>> for more information. + _<<_configuration,Configuration>>_ for more information. *`--hash-file`* _PATH_:: @@ -209,7 +209,7 @@ Options for scripting or debugging *`--print-stats`*:: - Print statistics counter IDs and corresponding values machine-parsable + Print statistics counter IDs and corresponding values in machine-parsable (tab-separated) format. @@ -242,11 +242,11 @@ compiler than what ccache thinks. Configuration ------------- -ccache's default behavior can be overridden by settings in configuration files, +ccache's default behavior can be overridden by options in configuration files, which in turn can be overridden by environment variables with names starting with *CCACHE_*. Ccache normally reads configuration from two files: first a system-level configuration file and secondly a cache-specific configuration -file. The priority of configuration settings is as follows (where 1 is +file. The priorities of configuration options are as follows (where 1 is highest): 1. Environment variables. @@ -268,13 +268,15 @@ The location of the primary (cache-specific) configuration is determined like this: 1. If *CCACHE_CONFIGPATH* is set, use that path. -2. Otherwise, if <<config_cache_dir,*cache_dir*>> (*CCACHE_DIR*) is set then - use *<ccache_dir>/ccache.conf*. -3. Otherwise, if there is a legacy *$HOME/.ccache* directory then use +2. Otherwise, if the environment variable *CCACHE_DIR* is set then use + *$CCACHE_DIR/ccache.conf*. +3. Otherwise, if <<config_cache_dir,*cache_dir*>> is set in the secondary + (system-wide) configuration file then use *<cache_dir>/ccache.conf*. +4. Otherwise, if there is a legacy *$HOME/.ccache* directory then use *$HOME/.ccache/ccache.conf*. -4. Otherwise, if *XDG_CONFIG_HOME* is set then use +5. Otherwise, if *XDG_CONFIG_HOME* is set then use *$XDG_CONFIG_HOME/ccache/ccache.conf*. -5. Otherwise, use *%APPDATA%/ccache/ccache.conf* (Windows), +6. Otherwise, use *%APPDATA%/ccache/ccache.conf* (Windows), *$HOME/Library/Preferences/ccache/ccache.conf* (macOS) or *$HOME/.config/ccache/ccache.conf* (other systems). @@ -282,7 +284,7 @@ this: Configuration file syntax ~~~~~~~~~~~~~~~~~~~~~~~~~ -Configuration files are in a simple ``key = value'' format, one setting per +Configuration files are in a simple ``key = value'' format, one option per line. Lines starting with a hash sign are comments. Blank lines are ignored, as is whitespace surrounding keys and values. Example: @@ -331,9 +333,9 @@ option key. directory, but only absolute paths that begin with *base_dir*. Cache results can then be shared for compilations in different directories even if the project uses absolute paths in the compiler command line. See also - the discussion under <<_compiling_in_different_directories,COMPILING IN - DIFFERENT DIRECTORIES>>. If set to the empty string (which is the default), - no rewriting is done. + the discussion under _<<_compiling_in_different_directories,Compiling in + different directories>>_. If set to the empty string (which is the + default), no rewriting is done. + A typical path to use as *base_dir* is your home directory or another directory that is a parent of your project directories. Don't use `/` as the base @@ -383,16 +385,15 @@ project2 will be a different absolute path. [[config_cache_dir]] *cache_dir* (*CCACHE_DIR*):: This option specifies where ccache will keep its cached compiler outputs. - It will only take effect if set in the system-wide configuration file or as - an environment variable. The default is *$XDG_CACHE_HOME/ccache* if - *XDG_CACHE_HOME* is set, otherwise *$HOME/.cache/ccache*. Exception: If the - legacy directory *$HOME/.ccache* exists then that directory is the default. - - See also <<_location_of_the_primary_configuration_file,LOCATION OF THE - PRIMARY CONFIGURATION FILE>>. - - If you want to use another *CCACHE_DIR* value temporarily for one ccache - invocation you can use the `-d/--directory` command line option instead. + The default is *$XDG_CACHE_HOME/ccache* if *XDG_CACHE_HOME* is set, + otherwise *$HOME/.cache/ccache*. Exception: If the legacy directory + *$HOME/.ccache* exists then that directory is the default. ++ +See also _<<_location_of_the_primary_configuration_file,Location of the primary +configuration file>>_. ++ +If you want to use another *CCACHE_DIR* value temporarily for one ccache +invocation you can use the `-d/--directory` command line option instead. [[config_compiler]] *compiler* (*CCACHE_COMPILER* or (deprecated) *CCACHE_CC*):: @@ -422,8 +423,8 @@ project2 will be a different absolute path. don't affect code generation). You should only use *none* if you know what you are doing. *string:value*:: - Use *value* as the string to calculate hash from. This can be the compiler - revision number you retrieved earlier and set here via environment variable. + Hash *value*. This can for instance be a compiler revision number or + another string that the build system generates to identify the compiler. _a command string_:: Hash the standard output and standard error output of the specified command. The string will be split on whitespace to find out the command and @@ -453,10 +454,10 @@ compiler but another compiler wrapper -- in that case, the default *mtime* method will hash the mtime and size of the other compiler wrapper, which means that ccache won't be able to detect a compiler upgrade. Using a suitable command to identify the compiler is thus safer, but it's also slower, so you -should consider continue using the *mtime* method in combination with -the *prefix_command* option if possible. See -<<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER COMPILER -WRAPPERS>>. +should consider continue using the *mtime* method in combination with the +*prefix_command* option if possible. See +_<<_using_ccache_with_other_compiler_wrappers,Using ccache with other compiler +wrappers>>_. -- -- @@ -483,7 +484,7 @@ WRAPPERS>>. distcc's "pump" script. -- -[[config_compression]] *compression* (*CCACHE_COMPRESS* or *CCACHE_NOCOMPRESS*, see <<_boolean_values,Boolean values>> above):: +[[config_compression]] *compression* (*CCACHE_COMPRESS* or *CCACHE_NOCOMPRESS*, see _<<_boolean_values,Boolean values>>_ above):: If true, ccache will compress data it puts in the cache. However, this option has no effect on how files are retrieved from the cache; compressed @@ -514,17 +515,24 @@ Semantics of *compression_level*: A positive value corresponds to normal Zstandard compression levels. Lower levels (e.g. *1*) mean faster compression but worse compression ratio. Higher levels (e.g. *19*) mean slower compression but better compression - ratio. The maximum possible value depends on the libzstd version. - Decompression speed is essentially the same for all levels. + ratio. The maximum possible value depends on the libzstd version, but at + least up to 19 is available for all versions. Decompression speed is + essentially the same for all levels. As a rule of thumb, use level 5 or + lower since higher levels may slow down compilations noticeably. Higher + levels are however useful when recompressing the cache with command line + option *-X/--recompress*. *< 0*:: - A negative value corresponds to Zstandard's “ultra-fast” compression + A negative value corresponds to Zstandard's ``ultra-fast'' compression levels, which are even faster than level 1 but with less good compression - ratios. For instance, level *-3* corresponds to “--fast=3” for the *zstd* - command line tool. + ratios. For instance, level *-3* corresponds to ``--fast=3'' for the *zstd* + command line tool. In practice, there is little use for levels lower than + *-5* or so. *0* (default):: The value *0* means that ccache will choose a suitable level, currently *1*. -- ++ +See the http://zstd.net[Zstandard documentation] for more information. [[config_cpp_extension]] *cpp_extension* (*CCACHE_EXTENSION*):: @@ -534,24 +542,38 @@ Semantics of *compression_level*: compiled, but that sometimes doesn't work. For example, when using the ``aCC'' compiler on HP-UX, set the cpp extension to *i*. -[[config_debug]] *debug* (*CCACHE_DEBUG* or *CCACHE_NODEBUG*, see <<_boolean_values,Boolean values>> above):: +[[config_debug]] *debug* (*CCACHE_DEBUG* or *CCACHE_NODEBUG*, see _<<_boolean_values,Boolean values>>_ above):: If true, enable the debug mode. The debug mode creates per-object debug files that are helpful when debugging unexpected cache misses. Note however that ccache performance will be reduced slightly. See - <<_cache_debugging,debugging>> for more information. The default is false. + _<<_cache_debugging,Cache debugging>>_ for more information. The default is + false. -[[config_depend_mode]] *depend_mode* (*CCACHE_DEPEND* or *CCACHE_NODEPEND*, see <<_boolean_values,Boolean values>> above):: +[[config_debug_dir]] *debug_dir* (*CCACHE_DEBUGDIR*):: + + Specifies where to write per-object debug files if the _<<config_debug,debug + mode>>_ is enabled. If set to the empty string, the files will be written + next to the object file. If set to a directory, the debug files will be + written with full absolute paths in that directory, creating it if needed. + The default is the empty string. + + For example, if *debug_dir* is set to `/example`, the current working + directory is `/home/user` and the object file is `build/output.o` then the + debug log will be written to `/example/home/user/build/output.o.ccache-log`. + See also _<<_cache_debugging,Cache debugging>>_. + +[[config_depend_mode]] *depend_mode* (*CCACHE_DEPEND* or *CCACHE_NODEPEND*, see _<<_boolean_values,Boolean values>>_ above):: If true, the depend mode will be used. The default is false. See - <<_the_depend_mode,THE DEPEND MODE>>. + _<<_the_depend_mode,The depend mode>>_. -[[config_direct_mode]] *direct_mode* (*CCACHE_DIRECT* or *CCACHE_NODIRECT*, see <<_boolean_values,Boolean values>> above):: +[[config_direct_mode]] *direct_mode* (*CCACHE_DIRECT* or *CCACHE_NODIRECT*, see _<<_boolean_values,Boolean values>>_ above):: If true, the direct mode will be used. The default is true. See - <<_the_direct_mode,THE DIRECT MODE>>. + _<<_the_direct_mode,The direct mode>>_. -[[config_disable]] *disable* (*CCACHE_DISABLE* or *CCACHE_NODISABLE*, see <<_boolean_values,Boolean values>> above):: +[[config_disable]] *disable* (*CCACHE_DISABLE* or *CCACHE_NODISABLE*, see _<<_boolean_values,Boolean values>>_ above):: When true, ccache will just call the real compiler, bypassing the cache completely. The default is false. @@ -562,10 +584,10 @@ Semantics of *compression_level*: hash sum that identifies the build. The list separator is semicolon on Windows systems and colon on other systems. -[[config_file_clone]] *file_clone* (*CCACHE_FILECLONE* or *CCACHE_NOFILECLONE*, see <<_boolean_values,Boolean values>> above):: +[[config_file_clone]] *file_clone* (*CCACHE_FILECLONE* or *CCACHE_NOFILECLONE*, see _<<_boolean_values,Boolean values>>_ above):: - If true, ccache will attempt to use file cloning (also known as “copy on - write”, “CoW” or “reflinks”) to store and fetch cached compiler results. + If true, ccache will attempt to use file cloning (also known as ``copy on + write'', ``CoW'' or ``reflinks'') to store and fetch cached compiler results. *file_clone* has priority over <<config_hard_link,*hard_link*>>. The default is false. + @@ -578,7 +600,7 @@ safe to use, but not all file systems support the feature. For such file systems, ccache will fall back to use plain copying (or hard links if <<config_hard_link,*hard_link*>> is enabled). -[[config_hard_link]] *hard_link* (*CCACHE_HARDLINK* or *CCACHE_NOHARDLINK*, see <<_boolean_values,Boolean values>> above):: +[[config_hard_link]] *hard_link* (*CCACHE_HARDLINK* or *CCACHE_NOHARDLINK*, see _<<_boolean_values,Boolean values>>_ above):: If true, ccache will attempt to use hard links to store and fetch cached object files. The default is false. @@ -608,7 +630,7 @@ WARNING: Do not enable this option unless you are aware of these caveats: *file.o* in build tree A as well. This can retrigger relinking in build tree A even though nothing really has changed. -[[config_hash_dir]] *hash_dir* (*CCACHE_HASHDIR* or *CCACHE_NOHASHDIR*, see <<_boolean_values,Boolean values>> above):: +[[config_hash_dir]] *hash_dir* (*CCACHE_HASHDIR* or *CCACHE_NOHASHDIR*, see _<<_boolean_values,Boolean values>>_ above):: If true (which is the default), ccache will include the current working directory (CWD) in the hash that is used to distinguish two compilations @@ -616,7 +638,8 @@ WARNING: Do not enable this option unless you are aware of these caveats: Exception: The CWD will not be included in the hash if <<config_base_dir,*base_dir*>> is set (and matches the CWD) and the compiler option *-fdebug-prefix-map* is used. See also the discussion under - <<_compiling_in_different_directories,COMPILING IN DIFFERENT DIRECTORIES>>. + _<<_compiling_in_different_directories,Compiling in different + directories>>_. + The reason for including the CWD in the hash by default is to prevent a problem with the storage of the current working directory in the debug info of an @@ -645,7 +668,7 @@ might be incorrect. example, `-fmessage-length=*` will match both `-fmessage-length=20` and `-fmessage-length=70`. -[[config_inode_cache]] *inode_cache* (*CCACHE_INODECACHE* or *CCACHE_NOINODECACHE*, see <<_boolean_values,Boolean values>> above):: +[[config_inode_cache]] *inode_cache* (*CCACHE_INODECACHE* or *CCACHE_NOINODECACHE*, see _<<_boolean_values,Boolean values>>_ above):: If true, enables caching of source file hashes based on device, inode and timestamps. This will reduce the time spent on hashing included files as @@ -656,7 +679,7 @@ available on Windows. + The feature requires *temporary_dir* to be located on a local filesystem. -[[config_keep_comments_cpp]] *keep_comments_cpp* (*CCACHE_COMMENTS* or *CCACHE_NOCOMMENTS*, see <<_boolean_values,Boolean values>> above):: +[[config_keep_comments_cpp]] *keep_comments_cpp* (*CCACHE_COMMENTS* or *CCACHE_NOCOMMENTS*, see _<<_boolean_values,Boolean values>>_ above):: If true, ccache will not discard the comments before hashing preprocessor output. This can be used to check documentation with *-Wdocumentation*. @@ -665,7 +688,7 @@ The feature requires *temporary_dir* to be located on a local filesystem. Sets the limit when cleaning up. Files are deleted (in LRU order) until the levels are below the limit. The default is 0.8 (= 80%). See - <<_automatic_cleanup,AUTOMATIC CLEANUP>> for more information. + _<<_automatic_cleanup,Automatic cleanup>>_ for more information. [[config_log_file]] *log_file* (*CCACHE_LOGFILE*):: @@ -687,14 +710,14 @@ file in `/etc/rsyslog.d`: This option specifies the maximum number of files to keep in the cache. Use 0 for no limit (which is the default). See also - <<_cache_size_management,CACHE SIZE MANAGEMENT>>. + _<<_cache_size_management,Cache size management>>_. [[config_max_size]] *max_size* (*CCACHE_MAXSIZE*):: This option specifies the maximum size of the cache. Use 0 for no limit. The default value is 5G. Available suffixes: k, M, G, T (decimal) and Ki, Mi, Gi, Ti (binary). The default suffix is G. See also - <<_cache_size_management,CACHE SIZE MANAGEMENT>>. + _<<_cache_size_management,Cache size management>>_. [[config_path]] *path* (*CCACHE_PATH*):: @@ -704,7 +727,7 @@ file in `/etc/rsyslog.d`: matching the compiler name in the normal *PATH* that isn't a symbolic link to ccache itself. -[[config_pch_external_checksum]] *pch_external_checksum* (*CCACHE_PCH_EXTSUM* or *CCACHE_NOPCH_EXTSUM*, see <<_boolean_values,Boolean values>> above):: +[[config_pch_external_checksum]] *pch_external_checksum* (*CCACHE_PCH_EXTSUM* or *CCACHE_NOPCH_EXTSUM*, see _<<_boolean_values,Boolean values>>_ above):: When this option is set, and ccache finds a precompiled header file, ccache will look for a file with the extension ``.sum'' added @@ -716,15 +739,15 @@ file in `/etc/rsyslog.d`: This option adds a list of prefixes (separated by space) to the command line that ccache uses when invoking the compiler. See also - <<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER - COMPILER WRAPPERS>>. + _<<_using_ccache_with_other_compiler_wrappers,Using ccache with other + compiler wrappers>>_. [[config_prefix_command_cpp]] *prefix_command_cpp* (*CCACHE_PREFIX_CPP*):: This option adds a list of prefixes (separated by space) to the command line that ccache uses when invoking the preprocessor. -[[config_read_only]] *read_only* (*CCACHE_READONLY* or *CCACHE_NOREADONLY*, see <<_boolean_values,Boolean values>> above):: +[[config_read_only]] *read_only* (*CCACHE_READONLY* or *CCACHE_NOREADONLY*, see _<<_boolean_values,Boolean values>>_ above):: If true, ccache will attempt to use existing cached results, but it will not add new results to the cache. Statistics counters will still be updated, @@ -735,22 +758,22 @@ set <<config_temporary_dir,*temporary_dir*>> since ccache will fail to create temporary files otherwise. You may also want to set <<config_stats,*stats*>> to *false* make ccache not even try to update stats files. -[[config_read_only_direct]] *read_only_direct* (*CCACHE_READONLY_DIRECT* or *CCACHE_NOREADONLY_DIRECT*, see <<_boolean_values,Boolean values>> above):: +[[config_read_only_direct]] *read_only_direct* (*CCACHE_READONLY_DIRECT* or *CCACHE_NOREADONLY_DIRECT*, see _<<_boolean_values,Boolean values>>_ above):: Just like <<config_read_only,*read_only*>> except that ccache will only try to retrieve results from the cache using the direct mode, not the preprocessor mode. See documentation for <<config_read_only,*read_only*>> regarding using a read-only ccache directory. -[[config_recache]] *recache* (*CCACHE_RECACHE* or *CCACHE_NORECACHE*, see <<_boolean_values,Boolean values>> above):: +[[config_recache]] *recache* (*CCACHE_RECACHE* or *CCACHE_NORECACHE*, see _<<_boolean_values,Boolean values>>_ above):: If true, ccache will not use any previously stored result. New results will still be cached, possibly overwriting any pre-existing results. -[[config_run_second_cpp]] *run_second_cpp* (*CCACHE_CPP2* or *CCACHE_NOCPP2*, see <<_boolean_values,Boolean values>> above):: +[[config_run_second_cpp]] *run_second_cpp* (*CCACHE_CPP2* or *CCACHE_NOCPP2*, see _<<_boolean_values,Boolean values>>_ above):: If true, ccache will first run the preprocessor to preprocess the source - code (see <<_the_preprocessor_mode,THE PREPROCESSOR MODE>>) and then on a + code (see _<<_the_preprocessor_mode,The preprocessor mode>>_) and then on a cache miss run the compiler on the source code to get hold of the object file. This is the default. + @@ -792,11 +815,15 @@ still has to do _some_ preprocessing (like macros). Ignore ctimes when *file_stat_matches* is enabled. This can be useful when backdating files' mtimes in a controlled way. *include_file_ctime*:: - By default, ccache will not cache a file if it includes a header whose - ctime is too new. This sloppiness disables that check. + By default, ccache will not cache a file if it includes a header whose ctime + is too new. This sloppiness disables that check. See also + _<<_handling_of_newly_created_header_files,Handling of newly created header + files>>_. *include_file_mtime*:: By default, ccache will not cache a file if it includes a header whose - mtime is too new. This sloppiness disables that check. + mtime is too new. This sloppiness disables that check. See also + _<<_handling_of_newly_created_header_files,Handling of newly created header + files>>_. *locale*:: Ccache includes the environment variables *LANG*, *LC_ALL*, *LC_CTYPE* and *LC_MESSAGES* in the hash by default since they may affect localization of @@ -804,12 +831,12 @@ still has to do _some_ preprocessing (like macros). that. *pch_defines*:: Be sloppy about **#define**s when precompiling a header file. See - <<_precompiled_headers,PRECOMPILED HEADERS>> for more information. + _<<_precompiled_headers,Precompiled headers>>_ for more information. *modules*:: By default, ccache will not cache compilations if *-fmodules* is used since it cannot hash the state of compiler's internal representation of relevant modules. This sloppiness allows caching in such a case. See - <<_c_modules,C++ MODULES>> for more information. + _<<_c_modules,C++ modules>>_ for more information. *system_headers*:: By default, ccache will also include all system headers in the manifest. With this sloppiness set, ccache will only include system headers in the @@ -819,10 +846,10 @@ still has to do _some_ preprocessing (like macros). source code. -- + -See the discussion under <<_troubleshooting,TROUBLESHOOTING>> for more +See the discussion under _<<_troubleshooting,Troubleshooting>>_ for more information. -[[config_stats]] *stats* (*CCACHE_STATS* or *CCACHE_NOSTATS*, see <<_boolean_values,Boolean values>> above):: +[[config_stats]] *stats* (*CCACHE_STATS* or *CCACHE_NOSTATS*, see _<<_boolean_values,Boolean values>>_ above):: If true, ccache will update the statistics counters on each compilation. The default is true. @@ -900,11 +927,11 @@ Cache compression ----------------- Ccache will by default compress all data it puts into the cache using the -compression algorithm Zstandard (zstd) using compression level 1. The algorithm -is fast enough that there should be little reason to turn off compression to -gain performance. One exception is if the cache is located on a compressed file -system, in which case the compression performed by ccache of course is -redundant. See the documentation for the configuration options +compression algorithm http://zstd.net[Zstandard] (zstd) using compression level +1. The algorithm is fast enough that there should be little reason to turn off +compression to gain performance. One exception is if the cache is located on a +compressed file system, in which case the compression performed by ccache of +course is redundant. See the documentation for the configuration options <<config_compression,*compression*>> and <<config_compression_level,*compression_level*>> for more information. @@ -921,11 +948,11 @@ Incompressible data: 3.5 GB Notes: -* The “disk blocks” size is the cache size when taking disk block size into - account. This value should match the “cache size” value from “ccache - --show-stats”. The other size numbers refer to actual content sizes. -* “Compressed data” refers to result and manifest files stored in the cache. -* “Incompressible data” refers to files that are always stored uncompressed +* The ``disk blocks'' size is the cache size when taking disk block size into + account. This value should match the ``cache size'' value from ``ccache + --show-stats''. The other size numbers refer to actual content sizes. +* ``Compressed data'' refers to result and manifest files stored in the cache. +* ``Incompressible data'' refers to files that are always stored uncompressed (triggered by enabling <<config_file_clone,*file_clone*>> or <<config_hard_link,*hard_link*>>) or unknown files (for instance files created by older ccache versions). @@ -977,7 +1004,9 @@ No result was found. Current size of the cache. | called for link | -The compiler was called for linking, not compiling. +The compiler was called for linking, not compiling. Ccache only supports +compilation of a single file, i.e. calling the compiler with the *-c* option to +produce a single object file from a single source file. | called for preprocessing | The compiler was called for preprocessing, not compiling. @@ -1089,7 +1118,7 @@ overhead. If no previous result is detected (i.e., there is a cache miss) using the direct mode, ccache will fall back to the preprocessor mode unless the *depend mode* is enabled. In the depend mode, ccache never runs the preprocessor, not -even on cache misses. Read more in <<_the_depend_mode,THE DEPEND MODE>> +even on cache misses. Read more in _<<_the_depend_mode,The depend mode>>_ below. @@ -1202,6 +1231,34 @@ The depend mode will be disabled if any of the following holds: * The compiler is not generating dependencies using *-MD* or *-MMD*. +Handling of newly created header files +-------------------------------------- + +If modification time (mtime) or status change time (ctime) of one of the include +files is the same second as the time compilation is being done, ccache disables +the direct mode (or, in the case of a <<_precompiled_headers,precompiled +header>>, disables caching completely). This done as a safety measure to avoid a +race condition (see below). + +To be able to use a newly created header files in direct mode (or use a newly +precompiled header), either: + +* create the include file earlier in the build process, or +* set <<config_sloppiness,*sloppiness*>> to + *include_file_ctime,include_file_mtime* if you are willing to take the risk, + for instance if you know that your build system is robust enough not to + trigger the race condition. + +For reference, the race condition mentioned above consists of these events: + +1. The preprocessor is run. +2. An include file is modified by someone. +3. The new include file is hashed by ccache. +4. The real compiler is run on the preprocessor's output, which contains data + from the old header file. +5. The wrong object file is stored in the cache. + + Cache debugging --------------- @@ -1233,16 +1290,20 @@ Log for this object file. |============================================================================== +If <<config_debug_dir,*config_dir*>> (environment variable *CCACHE_DEBUGDIR*) is +set, the files above will be written to that directory with full absolute paths +instead of next to the object file. + In the direct mode, ccache uses the 160 bit BLAKE3 hash of the *ccache-input-c* + *ccache-input-d* data (where *+* means concatenation), while the *ccache-input-c* + *ccache-input-p* data is used in the preprocessor mode. The *ccache-input-text* file is a combined text version of the three -binary input files. It has three sections (“COMMON”, “DIRECT MODE” and -“PREPROCESSOR MODE”), which is turn contain annotations that say what kind of +binary input files. It has three sections (``COMMON'', ``DIRECT MODE'' and +``PREPROCESSOR MODE''), which is turn contain annotations that say what kind of data comes next. -To debug why you don’t get an expected cache hit for an object file, you can do +To debug why you don't get an expected cache hit for an object file, you can do something like this: 1. Build with debug mode enabled. @@ -1302,6 +1363,10 @@ things to make it work properly: `__TIMESTAMP__` is used when using a precompiled header. Further, it can't detect changes in **#define**s in the source code because of how preprocessing works in combination with precompiled headers. +* You may also want to include *include_file_mtime,include_file_ctime* in + <<config_sloppiness,*sloppiness*>>. See + _<<_handling_of_newly_created_header_files,Handling of newly created header + files>>_. * You must either: + -- @@ -1429,7 +1494,7 @@ Caveats ------- * The direct mode fails to pick up new header files in some rare scenarios. See - <<_the_direct_mode,THE DIRECT MODE>> above. + _<<_the_direct_mode,The direct mode>>_ above. Troubleshooting @@ -1440,9 +1505,9 @@ General A general tip for getting information about what ccache is doing is to enable debug logging by setting the configuration option <<config_debug,*debug*>> (or -the environment variable *CCACHE_DEBUG*); see <<_cache_debugging,debugging>> -for more information. Another way of keeping track of what is happening is to -check the output of *ccache -s*. +the environment variable *CCACHE_DEBUG*); see _<<_cache_debugging,Cache +debugging>>_ for more information. Another way of keeping track of what is +happening is to check the output of *ccache -s*. Performance @@ -1472,15 +1537,10 @@ problems and what may be done to increase the hit rate: *-Wp,-MMD,_path_*, and *-Wp,-D_define_*) is used. ** This was the first compilation with a new value of the <<config_base_dir,base directory>>. -** A modification time of one of the include files is too new (created the same - second as the compilation is being done). This check is made to avoid a race - condition. To fix this, create the include file earlier in the build - process, if possible, or set <<config_sloppiness,*sloppiness*>> to - *include_file_ctime, include_file_mtime* if you are willing to take the risk. - (The race condition consists of these events: the preprocessor is run; an - include file is modified by someone; the new include file is hashed by - ccache; the real compiler is run on the preprocessor's output, which contains - data from the old header file; the wrong object file is stored in the cache.) +** A modification or status change time of one of the include files is too new + (created the same second as the compilation is being done). See + _<<_handling_of_newly_created_header_files,Handling of newly created header + files>>_. ** The `__TIME__` preprocessor macro is (potentially) being used. Ccache turns off direct mode if `__TIME__` is present in the source code. This is done as a safety measure since the string indicates that a `__TIME__` macro _may_ @@ -1522,12 +1582,12 @@ problems and what may be done to increase the hit rate: * If ``unsupported compiler option'' has been incremented, enable debug logging and check which compiler option was rejected. * If ``preprocessor error'' has been incremented, one possible reason is that - precompiled headers are being used. See <<_precompiled_headers,PRECOMPILED - HEADERS>> for how to remedy this. + precompiled headers are being used. See _<<_precompiled_headers,Precompiled + headers>>_ for how to remedy this. * If ``can't use precompiled header'' has been incremented, see - <<_precompiled_headers,PRECOMPILED HEADERS>>. -* If ``can't use modules'' has been incremented, see - <<_c_modules,C++ MODULES>>. + _<<_precompiled_headers,Precompiled headers>>_. +* If ``can't use modules'' has been incremented, see _<<_c_modules,C++ + modules>>_. Corrupt object files diff --git a/doc/NEWS.adoc b/doc/NEWS.adoc index d3e4a9d..a4b3af8 100644 --- a/doc/NEWS.adoc +++ b/doc/NEWS.adoc @@ -1,6 +1,116 @@ Ccache news =========== +Ccache 4.2 +---------- +Release date: 2021-02-02 + +New features +~~~~~~~~~~~~ + +- Improved calculation of relative paths when using `base_dir` to also consider + canonical paths (i.e. paths with dereferenced symlinks) as candidates. + +- Added a `debug_dir` (`CCACHE_DEBUGDIR`) configuration setting for specifying a + directory for files written in debug mode. + +- Added support for compiler option `-x cuda`, understood by Clang. + +- The value of the `SOURCE_DATE_EPOCH` variable is now only hashed if it + potentially affects the output from ccache. This means that ccache now (like + before version 4.0) will be able produce cache hits for source code that + doesn't contain `__DATE__` or `__TIME__` macros regardless of the value of + `SOURCE_DATE_EPOCH`. + + +Bug fixes +~~~~~~~~~ + +- Fixed a bug where a non-Clang compiler would silently accept the + Clang-specific `-f(no-)color-diagnostics` option when run via ccache. This + confused feature detection made by e.g. CMake. + +- Improved creation of temporary files on Windows. Previously, ccache would in + practice reuse temporary filenames on said platform resulting in various + problems with parallel builds. + +- Fixed creation of parent directories when creating a lock file on Windows. + +- Fixed a race condition related to removal of temporary files. + +- Improved calculation of directory name for a Windows-style path. + +- A compilation result is now not stored in the cache if an included + preprocessed header file is too new. This fixes a bug where the content of a + newly created preprocessed header file could be missing from the hash, + resulting in a false positive cache hit. + +- Fixed calculation of the split DWARF filename for an object filename with zero + or multiple dots. + +- Fixed retrieval of the object file the destination is `/dev/null`. + + +Portability and build improvements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Additional compiler flags like `-Wextra -Werror` are now only added when + building ccache in developer mode. + +- The developer build mode no longer enables `-Weverything` for Clang. + +- `_XOPEN_SOURCE` is now defined appropriately on FreeBSD to fix missing + declaration of `isascii`. + +- Improved detection of buildability of BLAKE3 assembler files. + +- Disabled build of inode cache code on OSes without + `pthread_mutexattr_setpshared`, such as OpenBSD. + +- Made static linking the default for a Windows MinGW build. + +- Removed legacy fallback replacements of `mkstemp` and `realpath`. + +- Improved detection of SSE/AVX support. + +- Improved detection of support for the AVX2 target attribute. + +- Configuration scripts now try to detect and enable BLAKE3's Neon support. + +- Made it possible to run the integration test suite on macOS. + +- Fixed building of 32-bit unit tests on macOS. + +- Made it possible to compile ccache for C++17. + +- Fixed printing of 64-bit `time_t` on 32-bit architectures like RISCV32. + +- Made sure to only use ASCII characters in the manual's AsciiDoc source code to + make it possible to generate documentation in non-UTF8 locales. + +- Upgraded to optional-lite 3.4.0, fmt 7.1.3, doctest 2.4.4 and zstd 1.4.8. + +- Took steps towards being able to run the test suite on Windows. + + +Documentation +~~~~~~~~~~~~~ + +- Improved wording of `compiler_check` string values. + +- Improved documentation of compression levels and the `-X/--recompress` option. + +- Improved consistency of terms in the manual. + +- HTML documentation is now built and installed by default if possible. + +- Fixed incorrect documentation of configuration option `cache_dir`. + +- Added hint on how to link statically with libzstd. + +- Mention that ccache requires the `-c` compiler option. + + Ccache 4.1 ---------- Release date: 2020-11-22 diff --git a/misc/format-files b/misc/format-files index 1c0405b..111705a 100755 --- a/misc/format-files +++ b/misc/format-files @@ -46,7 +46,7 @@ for file in "$@"; do echo "Error: $file not formatted with Clang-Format" echo 'Run "make format" or apply this diff:' git diff $cf_color --no-index "$file" "$tmp_file" \ - | sed -r -e "s!^---.*!--- a/$file!" \ + | sed -E -e "s!^---.*!--- a/$file!" \ -e "s!^\+\+\+.*!+++ b/$file!" \ -e "/diff --/d" -e "/index /d" \ -e "s/.[0-9]*.clang-format.tmp//" diff --git a/src/.clang-tidy b/src/.clang-tidy index 0f3fa75..f30529d 100644 --- a/src/.clang-tidy +++ b/src/.clang-tidy @@ -13,14 +13,17 @@ Checks: '-*, -readability-implicit-bool-conversion, -readability-magic-numbers, -readability-else-after-return, + -readability-named-parameter, -readability-qualified-auto, - -readability-magic-numbers, + -readability-redundant-declaration, performance-*, -performance-unnecessary-value-param, modernize-*, -modernize-avoid-c-arrays, -modernize-pass-by-value, + -modernize-return-braced-init-list, -modernize-use-auto, + -modernize-use-default-member-init, -modernize-use-trailing-return-type, cppcoreguidelines-*, -cppcoreguidelines-pro-bounds-array-to-pointer-decay, @@ -40,6 +43,8 @@ Checks: '-*, -cppcoreguidelines-pro-type-reinterpret-cast, -cppcoreguidelines-pro-type-union-access, -cppcoreguidelines-narrowing-conversions, + -cppcoreguidelines-non-private-member-variables-in-classes, + -cppcoreguidelines-special-member-functions, bugprone-*, -bugprone-signed-char-misuse, -bugprone-branch-clone, @@ -47,6 +52,7 @@ Checks: '-*, cert-*, -cert-err34-c, -cert-dcl50-cpp, + -cert-dcl58-cpp, -cert-err58-cpp, clang-diagnostic-*, clang-analyzer-*, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a88efc9..beefd81 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -52,6 +52,9 @@ if(WIN32) endif() add_library(ccache_lib STATIC ${source_files}) +target_compile_definitions( + ccache_lib PUBLIC -Dnssv_CONFIG_SELECT_STRING_VIEW=nssv_STRING_VIEW_NONSTD +) if(WIN32) target_link_libraries(ccache_lib PRIVATE ws2_32 "psapi") @@ -74,6 +77,6 @@ target_link_libraries( PRIVATE standard_settings standard_warnings ZSTD::ZSTD Threads::Threads third_party_lib) -target_include_directories(ccache_lib PRIVATE ${CMAKE_BINARY_DIR} .) +target_include_directories(ccache_lib PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory(third_party) diff --git a/src/CacheFile.hpp b/src/CacheFile.hpp index 59c180b..0541068 100644 --- a/src/CacheFile.hpp +++ b/src/CacheFile.hpp @@ -21,7 +21,6 @@ #include "system.hpp" #include "Stat.hpp" -#include "exceptions.hpp" #include "third_party/nonstd/optional.hpp" @@ -34,15 +33,12 @@ public: explicit CacheFile(const std::string& path); - CacheFile(const CacheFile&) = delete; - CacheFile& operator=(const CacheFile&) = delete; - const Stat& lstat() const; const std::string& path() const; Type type() const; private: - const std::string m_path; + std::string m_path; mutable nonstd::optional<Stat> m_stat; }; diff --git a/src/Config.cpp b/src/Config.cpp index a2e09ca..68dc3f5 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2020 Joel Rosdahl and other contributors +// Copyright (C) 2019-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -20,9 +20,9 @@ #include "AtomicFile.hpp" #include "Compression.hpp" +#include "Sloppiness.hpp" #include "Util.hpp" #include "assertions.hpp" -#include "ccache.hpp" #include "exceptions.hpp" #include "fmtmacros.hpp" @@ -52,6 +52,7 @@ enum class ConfigItem { compression_level, cpp_extension, debug, + debug_dir, depend_mode, direct_mode, disable, @@ -92,6 +93,7 @@ const std::unordered_map<std::string, ConfigItem> k_config_key_table = { {"compression_level", ConfigItem::compression_level}, {"cpp_extension", ConfigItem::cpp_extension}, {"debug", ConfigItem::debug}, + {"debug_dir", ConfigItem::debug_dir}, {"depend_mode", ConfigItem::depend_mode}, {"direct_mode", ConfigItem::direct_mode}, {"disable", ConfigItem::disable}, @@ -133,6 +135,7 @@ const std::unordered_map<std::string, std::string> k_env_variable_table = { {"COMPRESSLEVEL", "compression_level"}, {"CPP2", "run_second_cpp"}, {"DEBUG", "debug"}, + {"DEBUGDIR", "debug_dir"}, {"DEPEND", "depend_mode"}, {"DIR", "cache_dir"}, {"DIRECT", "direct_mode"}, @@ -546,6 +549,9 @@ Config::get_string_value(const std::string& key) const case ConfigItem::debug: return format_bool(m_debug); + case ConfigItem::debug_dir: + return m_debug_dir; + case ConfigItem::depend_mode: return format_bool(m_depend_mode); @@ -756,6 +762,10 @@ Config::set_item(const std::string& key, m_debug = parse_bool(value, env_var_key, negate); break; + case ConfigItem::debug_dir: + m_debug_dir = value; + break; + case ConfigItem::depend_mode: m_depend_mode = parse_bool(value, env_var_key, negate); break; diff --git a/src/Config.hpp b/src/Config.hpp index a5945fa..eb574dc 100644 --- a/src/Config.hpp +++ b/src/Config.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2020 Joel Rosdahl and other contributors +// Copyright (C) 2019-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -51,6 +51,7 @@ public: int8_t compression_level() const; const std::string& cpp_extension() const; bool debug() const; + const std::string& debug_dir() const; bool depend_mode() const; bool direct_mode() const; bool disable() const; @@ -83,7 +84,7 @@ public: void set_cache_dir(const std::string& value); void set_cpp_extension(const std::string& value); void set_compiler(const std::string& value); - void set_compiler_type(CompilerType compiler_type); + void set_compiler_type(CompilerType value); void set_depend_mode(bool value); void set_debug(bool value); void set_direct_mode(bool value); @@ -133,34 +134,35 @@ private: std::string m_secondary_config_path; bool m_absolute_paths_in_stderr = false; - std::string m_base_dir = ""; + std::string m_base_dir; std::string m_cache_dir; - std::string m_compiler = ""; + std::string m_compiler; std::string m_compiler_check = "mtime"; CompilerType m_compiler_type = CompilerType::auto_guess; bool m_compression = true; int8_t m_compression_level = 0; // Use default level - std::string m_cpp_extension = ""; + std::string m_cpp_extension; bool m_debug = false; + std::string m_debug_dir; bool m_depend_mode = false; bool m_direct_mode = true; bool m_disable = false; - std::string m_extra_files_to_hash = ""; + std::string m_extra_files_to_hash; bool m_file_clone = false; bool m_hard_link = false; bool m_hash_dir = true; - std::string m_ignore_headers_in_manifest = ""; - std::string m_ignore_options = ""; + std::string m_ignore_headers_in_manifest; + std::string m_ignore_options; bool m_inode_cache = false; bool m_keep_comments_cpp = false; double m_limit_multiple = 0.8; - std::string m_log_file = ""; + std::string m_log_file; uint64_t m_max_files = 0; uint64_t m_max_size = 5ULL * 1000 * 1000 * 1000; - std::string m_path = ""; + std::string m_path; bool m_pch_external_checksum = false; - std::string m_prefix_command = ""; - std::string m_prefix_command_cpp = ""; + std::string m_prefix_command; + std::string m_prefix_command_cpp; bool m_read_only = false; bool m_read_only_direct = false; bool m_recache = false; @@ -243,6 +245,12 @@ Config::debug() const return m_debug; } +inline const std::string& +Config::debug_dir() const +{ + return m_debug_dir; +} + inline bool Config::depend_mode() const { diff --git a/src/Context.cpp b/src/Context.cpp index 7706b7d..a7ce450 100644 --- a/src/Context.cpp +++ b/src/Context.cpp @@ -56,9 +56,10 @@ Context::register_pending_tmp_file(const std::string& path) void Context::unlink_pending_tmp_files_signal_safe() { - for (const std::string& path : m_pending_tmp_files) { + for (auto it = m_pending_tmp_files.rbegin(); it != m_pending_tmp_files.rend(); + ++it) { // Don't call Util::unlink_tmp since its log calls aren't signal safe. - unlink(path.c_str()); + unlink(it->c_str()); } // Don't clear m_pending_tmp_files since this method must be signal safe. } @@ -68,8 +69,9 @@ Context::unlink_pending_tmp_files() { SignalHandlerBlocker signal_handler_blocker; - for (const std::string& path : m_pending_tmp_files) { - Util::unlink_tmp(path, Util::UnlinkLog::ignore_failure); + for (auto it = m_pending_tmp_files.rbegin(); it != m_pending_tmp_files.rend(); + ++it) { + Util::unlink_tmp(*it, Util::UnlinkLog::ignore_failure); } m_pending_tmp_files.clear(); } diff --git a/src/Context.hpp b/src/Context.hpp index 7af0705..c021124 100644 --- a/src/Context.hpp +++ b/src/Context.hpp @@ -23,11 +23,12 @@ #include "Args.hpp" #include "ArgsInfo.hpp" #include "Config.hpp" +#include "Counters.hpp" #include "Digest.hpp" #include "File.hpp" #include "MiniTrace.hpp" #include "NonCopyable.hpp" -#include "ccache.hpp" +#include "Sloppiness.hpp" #ifdef INODE_CACHE_SUPPORTED # include "InodeCache.hpp" diff --git a/src/Counters.cpp b/src/Counters.cpp index 2e1b0e2..1263d9d 100644 --- a/src/Counters.cpp +++ b/src/Counters.cpp @@ -18,7 +18,7 @@ #include "Counters.hpp" -#include "Statistics.hpp" +#include "Statistic.hpp" #include "assertions.hpp" #include <algorithm> diff --git a/src/Depfile.hpp b/src/Depfile.hpp index 770f789..7250a4c 100644 --- a/src/Depfile.hpp +++ b/src/Depfile.hpp @@ -16,6 +16,8 @@ // this program; if not, write to the Free Software Foundation, Inc., 51 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#pragma once + class Context; class Hash; diff --git a/src/Hash.cpp b/src/Hash.cpp index ccc6f7b..61cc5a3 100644 --- a/src/Hash.cpp +++ b/src/Hash.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -106,7 +106,7 @@ bool Hash::hash_fd(int fd) { return Util::read_fd( - fd, [=](const void* data, size_t size) { hash(data, size); }); + fd, [this](const void* data, size_t size) { hash(data, size); }); } bool diff --git a/src/InodeCache.cpp b/src/InodeCache.cpp index a0e97a1..5e473ec 100644 --- a/src/InodeCache.cpp +++ b/src/InodeCache.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -202,9 +202,13 @@ InodeCache::hash_inode(const std::string& path, return true; } -InodeCache::Bucket* -InodeCache::acquire_bucket(uint32_t index) +bool +InodeCache::with_bucket(const Digest& key_digest, + const BucketHandler& bucket_handler) { + uint32_t hash; + Util::big_endian_to_int(key_digest.bytes(), hash); + const uint32_t index = hash % k_num_buckets; Bucket* bucket = &m_sr->buckets[index]; int err = pthread_mutex_lock(&bucket->mt); #ifdef HAVE_PTHREAD_MUTEX_ROBUST @@ -217,7 +221,7 @@ InodeCache::acquire_bucket(uint32_t index) LOG( "Can't consolidate stale mutex at index {}: {}", index, strerror(err)); LOG_RAW("Consider removing the inode cache file if the problem persists"); - return nullptr; + return false; } LOG("Wiping bucket at index {} because of stale mutex", index); memset(bucket->entries, 0, sizeof(Bucket::entries)); @@ -227,26 +231,20 @@ InodeCache::acquire_bucket(uint32_t index) LOG("Failed to lock mutex at index {}: {}", index, strerror(err)); LOG_RAW("Consider removing the inode cache file if problem persists"); ++m_sr->errors; - return nullptr; + return false; } #ifdef HAVE_PTHREAD_MUTEX_ROBUST } #endif - return bucket; -} - -InodeCache::Bucket* -InodeCache::acquire_bucket(const Digest& key_digest) -{ - uint32_t hash; - Util::big_endian_to_int(key_digest.bytes(), hash); - return acquire_bucket(hash % k_num_buckets); -} -void -InodeCache::release_bucket(Bucket* bucket) -{ + try { + bucket_handler(bucket); + } catch (...) { + pthread_mutex_unlock(&bucket->mt); + throw; + } pthread_mutex_unlock(&bucket->mt); + return true; } bool @@ -370,31 +368,28 @@ InodeCache::get(const std::string& path, return false; } - Bucket* bucket = acquire_bucket(key_digest); - - if (!bucket) { - return false; - } - bool found = false; - - for (uint32_t i = 0; i < k_num_entries; ++i) { - if (bucket->entries[i].key_digest == key_digest) { - if (i > 0) { - Entry tmp = bucket->entries[i]; - memmove(&bucket->entries[1], &bucket->entries[0], sizeof(Entry) * i); - bucket->entries[0] = tmp; - } - - file_digest = bucket->entries[0].file_digest; - if (return_value) { - *return_value = bucket->entries[0].return_value; + const bool success = with_bucket(key_digest, [&](Bucket* const bucket) { + for (uint32_t i = 0; i < k_num_entries; ++i) { + if (bucket->entries[i].key_digest == key_digest) { + if (i > 0) { + Entry tmp = bucket->entries[i]; + memmove(&bucket->entries[1], &bucket->entries[0], sizeof(Entry) * i); + bucket->entries[0] = tmp; + } + + file_digest = bucket->entries[0].file_digest; + if (return_value) { + *return_value = bucket->entries[0].return_value; + } + found = true; + break; } - found = true; - break; } + }); + if (!success) { + return false; } - release_bucket(bucket); LOG("inode cache {}: {}", found ? "hit" : "miss", path); @@ -404,7 +399,7 @@ InodeCache::get(const std::string& path, } else { ++m_sr->misses; } - LOG("accumulated stats for inode cache: hits={}, misses={}, errors={}", + LOG("Accumulated stats for inode cache: hits={}, misses={}, errors={}", m_sr->hits.load(), m_sr->misses.load(), m_sr->errors.load()); @@ -427,22 +422,20 @@ InodeCache::put(const std::string& path, return false; } - Bucket* bucket = acquire_bucket(key_digest); + const bool success = with_bucket(key_digest, [&](Bucket* const bucket) { + memmove(&bucket->entries[1], + &bucket->entries[0], + sizeof(Entry) * (k_num_entries - 1)); + + bucket->entries[0].key_digest = key_digest; + bucket->entries[0].file_digest = file_digest; + bucket->entries[0].return_value = return_value; + }); - if (!bucket) { + if (!success) { return false; } - memmove(&bucket->entries[1], - &bucket->entries[0], - sizeof(Entry) * (k_num_entries - 1)); - - bucket->entries[0].key_digest = key_digest; - bucket->entries[0].file_digest = file_digest; - bucket->entries[0].return_value = return_value; - - release_bucket(bucket); - LOG("inode cache insert: {}", path); return true; diff --git a/src/InodeCache.hpp b/src/InodeCache.hpp index 68d42ae..f2d049a 100644 --- a/src/InodeCache.hpp +++ b/src/InodeCache.hpp @@ -22,6 +22,7 @@ #include "config.h" +#include <functional> #include <string> class Config; @@ -94,13 +95,13 @@ private: struct Entry; struct Key; struct SharedRegion; + using BucketHandler = std::function<void(Bucket* bucket)>; bool mmap_file(const std::string& inode_cache_file); static bool hash_inode(const std::string& path, ContentType type, Digest& digest); - Bucket* acquire_bucket(uint32_t index); - Bucket* acquire_bucket(const Digest& key_digest); - static void release_bucket(Bucket* bucket); + bool with_bucket(const Digest& key_digest, + const BucketHandler& bucket_handler); static bool create_new_file(const std::string& filename); bool initialize(); diff --git a/src/Lockfile.cpp b/src/Lockfile.cpp index 715ffa4..fb9d5e4 100644 --- a/src/Lockfile.cpp +++ b/src/Lockfile.cpp @@ -160,7 +160,7 @@ do_acquire_win32(const std::string& lockfile, uint32_t staleness_limit) error); if (error == ERROR_PATH_NOT_FOUND) { // Directory doesn't exist? - if (Util::create_dir(Util::dir_name(lockfile)) == 0) { + if (Util::create_dir(Util::dir_name(lockfile))) { // OK. Retry. continue; } diff --git a/src/Logging.cpp b/src/Logging.cpp index 9a5d99b..c6590d8 100644 --- a/src/Logging.cpp +++ b/src/Logging.cpp @@ -1,5 +1,5 @@ // Copyright (C) 2002 Andrew Tridgell -// Copyright (C) 2009-2020 Joel Rosdahl and other contributors +// Copyright (C) 2009-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -88,7 +88,10 @@ do_log(string_view message, bool bulk) if (tm) { strftime(timestamp, sizeof(timestamp), "%Y-%m-%dT%H:%M:%S", &*tm); } else { - snprintf(timestamp, sizeof(timestamp), "%lu", tv.tv_sec); + snprintf(timestamp, + sizeof(timestamp), + "%llu", + static_cast<long long unsigned int>(tv.tv_sec)); } snprintf(prefix, sizeof(prefix), diff --git a/src/Manifest.cpp b/src/Manifest.cpp index 9ee87dc..38aec0c 100644 --- a/src/Manifest.cpp +++ b/src/Manifest.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2009-2020 Joel Rosdahl and other contributors +// Copyright (C) 2009-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -28,8 +28,8 @@ #include "File.hpp" #include "Hash.hpp" #include "Logging.hpp" +#include "Sloppiness.hpp" #include "StdMakeUnique.hpp" -#include "ccache.hpp" #include "fmtmacros.hpp" #include "hashutil.hpp" @@ -587,6 +587,8 @@ put(const Config& config, } catch (const Error& e) { LOG("Error: {}", e.what()); } + } else { + LOG_RAW("The entry already exists in the manifest, not adding"); } return false; } diff --git a/src/NonCopyable.hpp b/src/NonCopyable.hpp index 86004a9..37fe7e7 100644 --- a/src/NonCopyable.hpp +++ b/src/NonCopyable.hpp @@ -20,10 +20,10 @@ class NonCopyable { -protected: - NonCopyable() = default; - -private: +public: NonCopyable(const NonCopyable&) = delete; NonCopyable& operator=(const NonCopyable&) = delete; + +protected: + NonCopyable() = default; }; diff --git a/src/Result.cpp b/src/Result.cpp index ef8ac74..b20aa62 100644 --- a/src/Result.cpp +++ b/src/Result.cpp @@ -27,7 +27,7 @@ #include "File.hpp" #include "Logging.hpp" #include "Stat.hpp" -#include "Statistics.hpp" +#include "Statistic.hpp" #include "Util.hpp" #include "exceptions.hpp" #include "fmtmacros.hpp" diff --git a/src/ResultRetriever.cpp b/src/ResultRetriever.cpp index 77e044d..957bbee 100644 --- a/src/ResultRetriever.cpp +++ b/src/ResultRetriever.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -40,8 +40,13 @@ ResultRetriever::on_entry_start(uint32_t entry_number, uint64_t file_len, nonstd::optional<std::string> raw_file) { - std::string dest_path; + LOG("Reading {} entry #{} {} ({} bytes)", + raw_file ? "raw" : "embedded", + entry_number, + Result::file_type_to_string(file_type), + file_len); + std::string dest_path; m_dest_file_type = file_type; switch (file_type) { @@ -50,6 +55,8 @@ ResultRetriever::on_entry_start(uint32_t entry_number, break; case FileType::dependency: + // Dependency file: Open destination file but accumulate data in m_dest_data + // and write it in on_entry_end. if (m_ctx.args_info.generating_dependencies) { dest_path = m_ctx.args_info.output_dep; m_dest_data.reserve(file_len); @@ -57,8 +64,10 @@ ResultRetriever::on_entry_start(uint32_t entry_number, break; case FileType::stderr_output: + // Stderr data: Don't open a destination file. Instead accumulate it in + // m_dest_data and write it in on_entry_end. m_dest_data.reserve(file_len); - return; + break; case FileType::coverage_unmangled: if (m_ctx.args_info.generating_coverage) { @@ -92,46 +101,39 @@ ResultRetriever::on_entry_start(uint32_t entry_number, break; } - if (dest_path.empty()) { - LOG_RAW("Not copying"); + if (file_type == FileType::stderr_output) { + // Written in on_entry_end. + } else if (dest_path.empty()) { + LOG_RAW("Not writing"); } else if (dest_path == "/dev/null") { - LOG_RAW("Not copying to /dev/null"); + LOG_RAW("Not writing to /dev/null"); + } else if (raw_file) { + Util::clone_hard_link_or_copy_file(m_ctx, *raw_file, dest_path, false); + + // Update modification timestamp to save the file from LRU cleanup (and, if + // hard-linked, to make the object file newer than the source file). + Util::update_mtime(*raw_file); } else { - LOG("Retrieving {} file #{} {} ({} bytes)", - raw_file ? "raw" : "embedded", - entry_number, - Result::file_type_to_string(file_type), - file_len); - - if (raw_file) { - Util::clone_hard_link_or_copy_file(m_ctx, *raw_file, dest_path, false); - - // Update modification timestamp to save the file from LRU cleanup (and, - // if hard-linked, to make the object file newer than the source file). - Util::update_mtime(*raw_file); - } else { - LOG("Copying to {}", dest_path); - m_dest_fd = Fd( - open(dest_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666)); - if (!m_dest_fd) { - throw Error( - "Failed to open {} for writing: {}", dest_path, strerror(errno)); - } - m_dest_path = dest_path; + LOG("Writing to {}", dest_path); + m_dest_fd = Fd( + open(dest_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666)); + if (!m_dest_fd) { + throw Error( + "Failed to open {} for writing: {}", dest_path, strerror(errno)); } + m_dest_path = dest_path; } } void ResultRetriever::on_entry_data(const uint8_t* data, size_t size) { - ASSERT((m_dest_file_type == FileType::stderr_output && !m_dest_fd) - || (m_dest_file_type != FileType::stderr_output && m_dest_fd)); + ASSERT(!(m_dest_file_type == FileType::stderr_output && m_dest_fd)); if (m_dest_file_type == FileType::stderr_output || (m_dest_file_type == FileType::dependency && !m_dest_path.empty())) { m_dest_data.append(reinterpret_cast<const char*>(data), size); - } else { + } else if (m_dest_fd) { try { Util::write_fd(*m_dest_fd, data, size); } catch (Error& e) { @@ -144,6 +146,7 @@ void ResultRetriever::on_entry_end() { if (m_dest_file_type == FileType::stderr_output) { + LOG("Writing to file descriptor {}", STDERR_FILENO); Util::send_to_stderr(m_ctx, m_dest_data); } else if (m_dest_file_type == FileType::dependency && !m_dest_path.empty()) { write_dependency_file(); diff --git a/src/SignalHandler.cpp b/src/SignalHandler.cpp index 37604c2..8a07cc7 100644 --- a/src/SignalHandler.cpp +++ b/src/SignalHandler.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -18,11 +18,12 @@ #include "SignalHandler.hpp" -#include "assertions.hpp" - #ifndef _WIN32 # include "Context.hpp" +# include "assertions.hpp" + +# include <signal.h> // NOLINT: sigaddset et al are defined in signal.h namespace { diff --git a/src/SignalHandler.hpp b/src/SignalHandler.hpp index 50e7b0e..3ef8847 100644 --- a/src/SignalHandler.hpp +++ b/src/SignalHandler.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -20,8 +20,6 @@ #include "system.hpp" -#include "signal.h" - class Context; class SignalHandler diff --git a/src/Sloppiness.hpp b/src/Sloppiness.hpp new file mode 100644 index 0000000..bd2078e --- /dev/null +++ b/src/Sloppiness.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +enum Sloppiness { + SLOPPY_INCLUDE_FILE_MTIME = 1 << 0, + SLOPPY_INCLUDE_FILE_CTIME = 1 << 1, + SLOPPY_TIME_MACROS = 1 << 2, + SLOPPY_PCH_DEFINES = 1 << 3, + // Allow us to match files based on their stats (size, mtime, ctime), without + // looking at their contents. + SLOPPY_FILE_STAT_MATCHES = 1 << 4, + // Allow us to not include any system headers in the manifest include files, + // similar to -MM versus -M for dependencies. + SLOPPY_SYSTEM_HEADERS = 1 << 5, + // Allow us to ignore ctimes when comparing file stats, so we can fake mtimes + // if we want to (it is much harder to fake ctimes, requires changing clock) + SLOPPY_FILE_STAT_MATCHES_CTIME = 1 << 6, + // Allow us to not include the -index-store-path option in the manifest hash. + SLOPPY_CLANG_INDEX_STORE = 1 << 7, + // Ignore locale settings. + SLOPPY_LOCALE = 1 << 8, + // Allow caching even if -fmodules is used. + SLOPPY_MODULES = 1 << 9, +}; diff --git a/src/Statistic.hpp b/src/Statistic.hpp new file mode 100644 index 0000000..cd6cda6 --- /dev/null +++ b/src/Statistic.hpp @@ -0,0 +1,58 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +// Statistics fields in storage order. +enum class Statistic { + none = 0, + compiler_produced_stdout = 1, + compile_failed = 2, + internal_error = 3, + cache_miss = 4, + preprocessor_error = 5, + could_not_find_compiler = 6, + missing_cache_file = 7, + preprocessed_cache_hit = 8, + bad_compiler_arguments = 9, + called_for_link = 10, + files_in_cache = 11, + cache_size_kibibyte = 12, + obsolete_max_files = 13, + obsolete_max_size = 14, + unsupported_source_language = 15, + bad_output_file = 16, + no_input_file = 17, + multiple_source_files = 18, + autoconf_test = 19, + unsupported_compiler_option = 20, + output_to_stdout = 21, + direct_cache_hit = 22, + compiler_produced_no_output = 23, + compiler_produced_empty_output = 24, + error_hashing_extra_file = 25, + compiler_check_failed = 26, + could_not_use_precompiled_header = 27, + called_for_preprocessing = 28, + cleanups_performed = 29, + unsupported_code_directive = 30, + stats_zeroed_timestamp = 31, + could_not_use_modules = 32, + + END +}; diff --git a/src/Statistics.hpp b/src/Statistics.hpp index 61721b4..34a9982 100644 --- a/src/Statistics.hpp +++ b/src/Statistics.hpp @@ -21,6 +21,7 @@ #include "system.hpp" #include "Counters.hpp" +#include "Statistic.hpp" // Any reasonable use of Statistics requires the Statistic enum. #include "third_party/nonstd/optional.hpp" @@ -29,45 +30,6 @@ class Config; -// Statistics fields in storage order. -enum class Statistic { - none = 0, - compiler_produced_stdout = 1, - compile_failed = 2, - internal_error = 3, - cache_miss = 4, - preprocessor_error = 5, - could_not_find_compiler = 6, - missing_cache_file = 7, - preprocessed_cache_hit = 8, - bad_compiler_arguments = 9, - called_for_link = 10, - files_in_cache = 11, - cache_size_kibibyte = 12, - obsolete_max_files = 13, - obsolete_max_size = 14, - unsupported_source_language = 15, - bad_output_file = 16, - no_input_file = 17, - multiple_source_files = 18, - autoconf_test = 19, - unsupported_compiler_option = 20, - output_to_stdout = 21, - direct_cache_hit = 22, - compiler_produced_no_output = 23, - compiler_produced_empty_output = 24, - error_hashing_extra_file = 25, - compiler_check_failed = 26, - could_not_use_precompiled_header = 27, - called_for_preprocessing = 28, - cleanups_performed = 29, - unsupported_code_directive = 30, - stats_zeroed_timestamp = 31, - could_not_use_modules = 32, - - END -}; - namespace Statistics { // Read counters from `path`. No lock is acquired. diff --git a/src/TemporaryFile.cpp b/src/TemporaryFile.cpp index 0c64f5e..feaa5f1 100644 --- a/src/TemporaryFile.cpp +++ b/src/TemporaryFile.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -20,6 +20,10 @@ #include "Util.hpp" +#ifdef _WIN32 +# include "third_party/win32/mktemp.h" +#endif + using nonstd::string_view; namespace { @@ -39,30 +43,23 @@ get_umask() } #endif -#ifndef HAVE_MKSTEMP -// Cheap and nasty mkstemp replacement. -int -mkstemp(char* name_template) -{ -# ifdef __GNUC__ -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wdeprecated-declarations" -# endif - mktemp(name_template); -# ifdef __GNUC__ -# pragma GCC diagnostic pop -# endif - return open(name_template, O_RDWR | O_CREAT | O_EXCL | O_BINARY, 0600); -} -#endif - } // namespace TemporaryFile::TemporaryFile(string_view path_prefix) : path(std::string(path_prefix) + ".XXXXXX") { Util::ensure_dir_exists(Util::dir_name(path)); +#ifdef _WIN32 + // MSVC lacks mkstemp() and Mingw-w64's implementation[1] is problematic, as + // it can reuse the names of recently-deleted files unless the caller + // remembers to call srand(). + + // [1]: <https://github.com/Alexpux/mingw-w64/blob/ + // d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-crt/misc/mkstemp.c> + fd = Fd(bsd_mkstemp(&path[0])); +#else fd = Fd(mkstemp(&path[0])); +#endif if (!fd) { throw Fatal( "Failed to create temporary file for {}: {}", path, strerror(errno)); diff --git a/src/Util.cpp b/src/Util.cpp index bac6b7d..71de5f9 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2020 Joel Rosdahl and other contributors +// Copyright (C) 2019-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -290,16 +290,17 @@ clone_hard_link_or_copy_file(const Context& ctx, #endif } if (ctx.config.hard_link()) { - unlink(dest.c_str()); LOG("Hard linking {} to {}", source, dest); - int ret = link(source.c_str(), dest.c_str()); - if (ret == 0) { + try { + Util::hard_link(source, dest); if (chmod(dest.c_str(), 0444) != 0) { LOG("Failed to chmod: {}", strerror(errno)); } return; + } catch (const Error& e) { + LOG_RAW(e.what()); + // Fall back to copying. } - LOG("Failed to hard link: {}", strerror(errno)); } LOG("Copying {} to {}", source, dest); @@ -412,9 +413,19 @@ dir_name(string_view path) #endif size_t n = path.find_last_of(delim); if (n == std::string::npos) { + // "foo" -> "." return "."; + } else if (n == 0) { + // "/" -> "/" (Windows: or "\\" -> "\\") + return path.substr(0, 1); +#ifdef _WIN32 + } else if (n == 2 && path[1] == ':') { + // Windows: "C:\\foo" -> "C:\\" or "C:/foo" -> "C:/" + return path.substr(0, 3); +#endif } else { - return n == 0 ? "/" : path.substr(0, n); + // "/dir/foo" -> "/dir" (Windows: or "C:\\dir\\foo" -> "C:\\dir") + return path.substr(0, n); } } @@ -643,13 +654,14 @@ get_extension(string_view path) } } -void +std::vector<CacheFile> get_level_1_files(const std::string& dir, - const ProgressReceiver& progress_receiver, - std::vector<std::shared_ptr<CacheFile>>& files) + const ProgressReceiver& progress_receiver) { + std::vector<CacheFile> files; + if (!Stat::stat(dir)) { - return; + return files; } size_t level_2_directories = 0; @@ -661,7 +673,7 @@ get_level_1_files(const std::string& dir, } if (!is_dir) { - files.push_back(std::make_shared<CacheFile>(path)); + files.emplace_back(path); } else if (path != dir && path.find('/', dir.size() + 1) == std::string::npos) { ++level_2_directories; @@ -670,6 +682,7 @@ get_level_1_files(const std::string& dir, }); progress_receiver(1.0); + return files; } std::string @@ -777,6 +790,30 @@ get_path_in_cache(string_view cache_dir, uint8_t level, string_view name) return path; } +void +hard_link(const std::string& oldpath, const std::string& newpath) +{ + // Assumption: newpath may already exist as a left-over file from a previous + // run, but it's only we who can create the file entry now so we don't try to + // handle a race between unlink() and link() below. + unlink(newpath.c_str()); + +#ifndef _WIN32 + if (link(oldpath.c_str(), newpath.c_str()) != 0) { + throw Error( + "failed to link {} to {}: {}", oldpath, newpath, strerror(errno)); + } +#else + if (!CreateHardLink(newpath.c_str(), oldpath.c_str(), nullptr)) { + DWORD error = GetLastError(); + throw Error("failed to link {} to {}: {}", + oldpath, + newpath, + Win32Util::error_message(error)); + } +#endif +} + bool is_absolute_path(string_view path) { @@ -833,10 +870,12 @@ localtime(optional<time_t> time) } std::string -make_relative_path(const Context& ctx, string_view path) +make_relative_path(const std::string& base_dir, + const std::string& actual_cwd, + const std::string& apparent_cwd, + nonstd::string_view path) { - if (ctx.config.base_dir().empty() - || !Util::starts_with(path, ctx.config.base_dir())) { + if (base_dir.empty() || !Util::starts_with(path, base_dir)) { return std::string(path); } @@ -858,28 +897,36 @@ make_relative_path(const Context& ctx, string_view path) // The algorithm for computing relative paths below only works for existing // paths. If the path doesn't exist, find the first ancestor directory that // does exist and assemble the path again afterwards. - string_view original_path = path; - std::string path_suffix; + + std::vector<std::string> relpath_candidates; + const auto original_path = path; Stat path_stat; while (!(path_stat = Stat::stat(std::string(path)))) { path = Util::dir_name(path); } - path_suffix = std::string(original_path.substr(path.length())); + const auto path_suffix = std::string(original_path.substr(path.length())); + const auto real_path = Util::real_path(std::string(path)); - std::string path_str(path); - std::string normalized_path = Util::normalize_absolute_path(path_str); - std::vector<std::string> relpath_candidates = { - Util::get_relative_path(ctx.actual_cwd, normalized_path), - }; - if (ctx.apparent_cwd != ctx.actual_cwd) { - relpath_candidates.emplace_back( - Util::get_relative_path(ctx.apparent_cwd, normalized_path)); - // Move best (= shortest) match first: - if (relpath_candidates[0].length() > relpath_candidates[1].length()) { - std::swap(relpath_candidates[0], relpath_candidates[1]); + const auto add_relpath_candidates = [&](nonstd::string_view path) { + const std::string normalized_path = Util::normalize_absolute_path(path); + relpath_candidates.push_back( + Util::get_relative_path(actual_cwd, normalized_path)); + if (apparent_cwd != actual_cwd) { + relpath_candidates.emplace_back( + Util::get_relative_path(apparent_cwd, normalized_path)); } + }; + add_relpath_candidates(path); + if (real_path != path) { + add_relpath_candidates(real_path); } + // Find best (i.e. shortest existing) match: + std::sort(relpath_candidates.begin(), + relpath_candidates.end(), + [](const std::string& path1, const std::string& path2) { + return path1.length() < path2.length(); + }); for (const auto& relpath : relpath_candidates) { if (Stat::stat(relpath).same_inode_as(path_stat)) { return relpath + path_suffix; @@ -890,6 +937,13 @@ make_relative_path(const Context& ctx, string_view path) return std::string(original_path); } +std::string +make_relative_path(const Context& ctx, string_view path) +{ + return make_relative_path( + ctx.config.base_dir(), ctx.actual_cwd, ctx.apparent_cwd, path); +} + bool matches_dir_prefix_or_file(string_view dir_prefix_or_file, string_view path) { @@ -1201,15 +1255,7 @@ real_path(const std::string& path, bool return_empty_on_error) resolved = buffer; } #else - // Yes, there are such systems. This replacement relies on the fact that when - // we call x_realpath we only care about symlinks. - { - ssize_t len = readlink(path.c_str(), buffer, buffer_size - 1); - if (len != -1) { - buffer[len] = 0; - resolved = buffer; - } - } +# error No realpath function available #endif return resolved ? resolved : (return_empty_on_error ? "" : path); diff --git a/src/Util.hpp b/src/Util.hpp index 3fbab45..7db8d95 100644 --- a/src/Util.hpp +++ b/src/Util.hpp @@ -208,10 +208,9 @@ nonstd::string_view get_extension(nonstd::string_view path); // Parameters: // - dir: The directory to traverse recursively. // - progress_receiver: Function that will be called for progress updates. -// - files: Found files. -void get_level_1_files(const std::string& dir, - const ProgressReceiver& progress_receiver, - std::vector<std::shared_ptr<CacheFile>>& files); +std::vector<CacheFile> +get_level_1_files(const std::string& dir, + const ProgressReceiver& progress_receiver); // Return the current user's home directory, or throw `Fatal` if it can't // be determined. @@ -234,6 +233,9 @@ std::string get_path_in_cache(nonstd::string_view cache_dir, uint8_t level, nonstd::string_view name); +// Hard-link `oldpath` to `newpath`. Throws `Error` on error. +void hard_link(const std::string& oldpath, const std::string& newpath); + // Write bytes in big endian order from an integer value. // // Parameters: @@ -306,8 +308,14 @@ bool is_precompiled_header(nonstd::string_view path); // time of day is used. nonstd::optional<tm> localtime(nonstd::optional<time_t> time = {}); -// Make a relative path from current working directory to `path` if `path` is -// under the base directory. +// Make a relative path from current working directory (either `actual_cwd` or +// `apparent_cwd`) to `path` if `path` is under `base_dir`. +std::string make_relative_path(const std::string& base_dir, + const std::string& actual_cwd, + const std::string& apparent_cwd, + nonstd::string_view path); + +// Like above but with base directory and apparent/actual CWD taken from `ctx`. std::string make_relative_path(const Context& ctx, nonstd::string_view path); // Return whether `path` is equal to `dir_prefix_or_file` or if diff --git a/src/argprocessing.cpp b/src/argprocessing.cpp index 1783f9f..612eadd 100644 --- a/src/argprocessing.cpp +++ b/src/argprocessing.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -48,7 +48,6 @@ struct ArgumentProcessingState bool found_rewrite_includes = false; std::string explicit_language; // As specified with -x. - std::string file_language; // As deduced from file extension. std::string input_charset_option; // -finput-charset=... // Is the dependency makefile name overridden with -MF? @@ -688,6 +687,19 @@ process_arg(Context& ctx, return nullopt; } + if (config.compiler_type() != CompilerType::clang + && (args[i] == "-fcolor-diagnostics" + || args[i] == "-fno-color-diagnostics")) { + // Special case: If a non-Clang compiler gets -f(no-)color-diagnostics we'll + // bail out and just execute the compiler. The reason is that we don't + // include -f(no-)color-diagnostics in the hash so there can be a false + // cache hit in the following scenario: + // + // 1. ccache gcc -c example.c # adds a cache entry + // 2. ccache gcc -c example.c -fcolor-diagnostics # unexpectedly succeeds + return Statistic::unsupported_compiler_option; + } + if (args[i] == "-fcolor-diagnostics" || args[i] == "-fdiagnostics-color" || args[i] == "-fdiagnostics-color=always") { state.color_diagnostics = ColorDiagnostics::always; @@ -841,7 +853,7 @@ process_arg(Context& ctx, } if (!args_info.input_file.empty()) { - if (!language_for_file(args[i]).empty()) { + if (supported_source_extension(args[i])) { LOG("Multiple input files: {} and {}", args_info.input_file, args[i]); return Statistic::multiple_source_files; } else if (!state.found_c_opt && !state.found_dc_opt) { @@ -980,7 +992,6 @@ process_args(Context& ctx) if (!state.explicit_language.empty() && state.explicit_language == "none") { state.explicit_language.clear(); } - state.file_language = language_for_file(args_info.input_file); if (!state.explicit_language.empty()) { if (!language_is_supported(state.explicit_language)) { LOG("Unsupported language: {}", state.explicit_language); @@ -988,7 +999,8 @@ process_args(Context& ctx) } args_info.actual_language = state.explicit_language; } else { - args_info.actual_language = state.file_language; + args_info.actual_language = + language_for_file(args_info.input_file, config.compiler_type()); } args_info.output_is_precompiled_header = @@ -1021,8 +1033,11 @@ process_args(Context& ctx) return Statistic::unsupported_source_language; } - if (!config.run_second_cpp() && args_info.actual_language == "cu") { - LOG_RAW("Using CUDA compiler; not compiling preprocessed code"); + if (!config.run_second_cpp() + && (args_info.actual_language == "cu" + || args_info.actual_language == "cuda")) { + LOG("Source language is \"{}\"; not compiling preprocessed code", + args_info.actual_language); config.set_run_second_cpp(true); } @@ -1056,12 +1071,6 @@ process_args(Context& ctx) } if (args_info.seen_split_dwarf) { - size_t pos = args_info.output_obj.rfind('.'); - if (pos == std::string::npos || pos == args_info.output_obj.size() - 1) { - LOG_RAW("Badly formed object filename"); - return Statistic::bad_compiler_arguments; - } - args_info.output_dwo = Util::change_extension(args_info.output_obj, ".dwo"); } diff --git a/src/argprocessing.hpp b/src/argprocessing.hpp index c040c44..a8e8f3a 100644 --- a/src/argprocessing.hpp +++ b/src/argprocessing.hpp @@ -19,7 +19,7 @@ #pragma once #include "Args.hpp" -#include "Statistics.hpp" +#include "Statistic.hpp" #include "third_party/nonstd/optional.hpp" @@ -27,10 +27,10 @@ class Context; struct ProcessArgsResult { - ProcessArgsResult(Statistic error); - ProcessArgsResult(const Args& preprocessor_args, - const Args& extra_args_to_hash, - const Args& compiler_args); + ProcessArgsResult(Statistic error_); + ProcessArgsResult(const Args& preprocessor_args_, + const Args& extra_args_to_hash_, + const Args& compiler_args_); // nullopt on success, otherwise the statistics counter that should be // incremented. diff --git a/src/ccache.cpp b/src/ccache.cpp index b7c3013..10724c9 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -1,5 +1,5 @@ // Copyright (C) 2002-2007 Andrew Tridgell -// Copyright (C) 2009-2020 Joel Rosdahl and other contributors +// Copyright (C) 2009-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -40,6 +40,7 @@ #include "ResultExtractor.hpp" #include "ResultRetriever.hpp" #include "SignalHandler.hpp" +#include "Statistics.hpp" #include "StdMakeUnique.hpp" #include "TemporaryFile.hpp" #include "UmaskScope.hpp" @@ -87,7 +88,7 @@ constexpr const char VERSION_TEXT[] = R"({} version {} Copyright (C) 2002-2007 Andrew Tridgell -Copyright (C) 2009-2020 Joel Rosdahl and other contributors +Copyright (C) 2009-2021 Joel Rosdahl and other contributors See <https://ccache.dev/credits.html> for a complete list of contributors. @@ -119,8 +120,9 @@ Common options: -M, --max-size SIZE set maximum size of cache to SIZE (use 0 for no limit); available suffixes: k, M, G, T (decimal) and Ki, Mi, Gi, Ti (binary); default suffix: G - -X, --recompress LEVEL recompress the cache to LEVEL (integer level or - "uncompressed") + -X, --recompress LEVEL recompress the cache to level LEVEL (integer or + "uncompressed") using the Zstandard algorithm; + see "Cache compression" in the manual for details -o, --set-config KEY=VAL set configuration item KEY to value VAL -x, --show-compression show compression statistics -p, --show-config show current configuration options in @@ -145,7 +147,7 @@ Options for scripting or debugging: --print-stats print statistics counter IDs and corresponding values in machine-parsable format -See also <https://ccache.dev>. +See also the manual on <https://ccache.dev/documentation.html>. )"; // How often (in seconds) to scan $CCACHE_DIR/tmp for left-over temporary @@ -180,6 +182,45 @@ const uint8_t k_max_cache_levels = 4; // stored in the cache changes in a backwards-incompatible way. const char HASH_PREFIX[] = "3"; +namespace { + +// Throw a Failure if ccache did not succeed in getting or putting a result in +// the cache. If `exit_code` is set, just exit with that code directly, +// otherwise execute the real compiler and exit with its exit code. Also updates +// statistics counter `statistic` if it's not `Statistic::none`. +class Failure : public std::exception +{ +public: + Failure(Statistic statistic, + nonstd::optional<int> exit_code = nonstd::nullopt); + + nonstd::optional<int> exit_code() const; + Statistic statistic() const; + +private: + Statistic m_statistic; + nonstd::optional<int> m_exit_code; +}; + +inline Failure::Failure(Statistic statistic, nonstd::optional<int> exit_code) + : m_statistic(statistic), m_exit_code(exit_code) +{ +} + +inline nonstd::optional<int> +Failure::exit_code() const +{ + return m_exit_code; +} + +inline Statistic +Failure::statistic() const +{ + return m_statistic; +} + +} // namespace + static void add_prefix(const Context& ctx, Args& args, const std::string& prefix_command) { @@ -231,10 +272,25 @@ clean_up_internal_tempdir(const Config& config) }); } +static std::string +prepare_debug_path(const std::string& debug_dir, + const std::string& output_obj, + string_view suffix) +{ + const std::string prefix = + debug_dir.empty() ? output_obj : debug_dir + Util::real_path(output_obj); + try { + Util::ensure_dir_exists(Util::dir_name(prefix)); + } catch (Error&) { + // Ignore since we can't handle an error in another way in this context. The + // caller takes care of logging when trying to open the path for writing. + } + return FMT("{}.ccache-{}", prefix, suffix); +} + static void init_hash_debug(Context& ctx, Hash& hash, - string_view obj_path, char type, string_view section_name, FILE* debug_text_file) @@ -243,7 +299,8 @@ init_hash_debug(Context& ctx, return; } - std::string path = FMT("{}.ccache-input-{}", obj_path, type); + const auto path = prepare_debug_path( + ctx.config.debug_dir(), ctx.args_info.output_obj, FMT("input-{}", type)); File debug_binary_file(path, "wb"); if (debug_binary_file) { hash.enable_debug(section_name, debug_binary_file.get(), debug_text_file); @@ -291,14 +348,39 @@ guess_compiler(string_view path) } static bool +include_file_too_new(const Context& ctx, + const std::string& path, + const Stat& path_stat) +{ + // The comparison using >= is intentional, due to a possible race between + // starting compilation and writing the include file. See also the notes under + // "Performance" in doc/MANUAL.adoc. + if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_MTIME) + && path_stat.mtime() >= ctx.time_of_compilation) { + LOG("Include file {} too new", path); + return true; + } + + // The same >= logic as above applies to the change time of the file. + if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_CTIME) + && path_stat.ctime() >= ctx.time_of_compilation) { + LOG("Include file {} ctime too new", path); + return true; + } + + return false; +} + +// Returns false if the include file was "too new" and therefore should disable +// the direct mode (or, in the case of a preprocessed header, fall back to just +// running the real compiler), otherwise true. +static bool do_remember_include_file(Context& ctx, std::string path, Hash& cpp_hash, bool system, Hash* depend_mode_hash) { - bool is_pch = false; - if (path.length() >= 2 && path[0] == '<' && path[path.length() - 1] == '>') { // Typically <built-in> or <command-line>. return true; @@ -355,26 +437,27 @@ do_remember_include_file(Context& ctx, } } - // The comparison using >= is intentional, due to a possible race between - // starting compilation and writing the include file. See also the notes - // under "Performance" in doc/MANUAL.adoc. - if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_MTIME) - && st.mtime() >= ctx.time_of_compilation) { - LOG("Include file {} too new", path); - return false; - } + const bool is_pch = Util::is_precompiled_header(path); + const bool too_new = include_file_too_new(ctx, path, st); + + if (too_new) { + // Opt out of direct mode because of a race condition. + // + // The race condition consists of these events: + // + // - the preprocessor is run + // - an include file is modified by someone + // - the new include file is hashed by ccache + // - the real compiler is run on the preprocessor's output, which contains + // data from the old header file + // - the wrong object file is stored in the cache. - // The same >= logic as above applies to the change time of the file. - if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_CTIME) - && st.ctime() >= ctx.time_of_compilation) { - LOG("Include file {} ctime too new", path); return false; } // Let's hash the include file content. Hash fhash; - is_pch = Util::is_precompiled_header(path); if (is_pch) { if (ctx.included_pch_file.empty()) { LOG("Detected use of precompiled header: {}", path); @@ -419,20 +502,28 @@ do_remember_include_file(Context& ctx, return true; } +enum class RememberIncludeFileResult { ok, cannot_use_pch }; + // This function hashes an include file and stores the path and hash in // ctx.included_files. If the include file is a PCH, cpp_hash is also updated. -static void +static RememberIncludeFileResult remember_include_file(Context& ctx, const std::string& path, Hash& cpp_hash, bool system, Hash* depend_mode_hash) { - if (!do_remember_include_file(ctx, path, cpp_hash, system, depend_mode_hash) - && ctx.config.direct_mode()) { - LOG_RAW("Disabling direct mode"); - ctx.config.set_direct_mode(false); + if (!do_remember_include_file( + ctx, path, cpp_hash, system, depend_mode_hash)) { + if (Util::is_precompiled_header(path)) { + return RememberIncludeFileResult::cannot_use_pch; + } else if (ctx.config.direct_mode()) { + LOG_RAW("Disabling direct mode"); + ctx.config.set_direct_mode(false); + } } + + return RememberIncludeFileResult::ok; } static void @@ -449,7 +540,10 @@ print_included_files(const Context& ctx, FILE* fp) // - Makes include file paths for which the base directory is a prefix relative // when computing the hash sum. // - Stores the paths and hashes of included files in ctx.included_files. -static bool +// +// Returns Statistic::none on success, otherwise a statistics counter to be +// incremented. +static Statistic process_preprocessed_file(Context& ctx, Hash& hash, const std::string& path, @@ -459,7 +553,7 @@ process_preprocessed_file(Context& ctx, try { data = Util::read_file(path); } catch (Error&) { - return false; + return Statistic::internal_error; } // Bytes between p and q are pending to be hashed. @@ -540,7 +634,7 @@ process_preprocessed_file(Context& ctx, q++; if (q >= end) { LOG_RAW("Failed to parse included file path"); - return false; + return Statistic::internal_error; } // q points to the beginning of an include file path hash.hash(p, q - p); @@ -582,7 +676,10 @@ process_preprocessed_file(Context& ctx, hash.hash(inc_path); } - remember_include_file(ctx, inc_path, hash, system, nullptr); + if (remember_include_file(ctx, inc_path, hash, system, nullptr) + == RememberIncludeFileResult::cannot_use_pch) { + return Statistic::could_not_use_precompiled_header; + } p = q; // Everything of interest between p and q has been hashed now. } else if (q[0] == '.' && q[1] == 'i' && q[2] == 'n' && q[3] == 'c' && q[4] == 'b' && q[5] == 'i' && q[6] == 'n') { @@ -627,7 +724,7 @@ process_preprocessed_file(Context& ctx, print_included_files(ctx, stdout); } - return true; + return Statistic::none; } // Extract the used includes from the dependency file. Note that we cannot @@ -1080,7 +1177,12 @@ get_result_name_from_cpp(Context& ctx, Args& args, Hash& hash) TemporaryFile tmp_stdout( FMT("{}/tmp.cpp_stdout", ctx.config.temporary_dir())); - stdout_path = tmp_stdout.path; + ctx.register_pending_tmp_file(tmp_stdout.path); + + // stdout_path needs the proper cpp_extension for the compiler to do its + // thing correctly. + stdout_path = FMT("{}.{}", tmp_stdout.path, ctx.config.cpp_extension()); + Util::hard_link(tmp_stdout.path, stdout_path); ctx.register_pending_tmp_file(stdout_path); TemporaryFile tmp_stderr( @@ -1115,9 +1217,11 @@ get_result_name_from_cpp(Context& ctx, Args& args, Hash& hash) } hash.hash_delimiter("cpp"); - bool is_pump = ctx.config.compiler_type() == CompilerType::pump; - if (!process_preprocessed_file(ctx, hash, stdout_path, is_pump)) { - throw Failure(Statistic::internal_error); + const bool is_pump = ctx.config.compiler_type() == CompilerType::pump; + const Statistic error = + process_preprocessed_file(ctx, hash, stdout_path, is_pump); + if (error != Statistic::none) { + throw Failure(error); } hash.hash_delimiter("cppstderr"); @@ -1130,11 +1234,7 @@ get_result_name_from_cpp(Context& ctx, Args& args, Hash& hash) if (ctx.args_info.direct_i_file) { ctx.i_tmpfile = ctx.args_info.input_file; } else { - // i_tmpfile needs the proper cpp_extension for the compiler to do its - // thing correctly - ctx.i_tmpfile = FMT("{}.{}", stdout_path, ctx.config.cpp_extension()); - Util::rename(stdout_path, ctx.i_tmpfile); - ctx.register_pending_tmp_file(ctx.i_tmpfile); + ctx.i_tmpfile = stdout_path; } if (!ctx.config.run_second_cpp()) { @@ -1275,7 +1375,7 @@ hash_common_info(const Context& ctx, "COMPILER_PATH", "GCC_COMPARE_DEBUG", "GCC_EXEC_PREFIX", - "SOURCE_DATE_EPOCH", + // Note: SOURCE_DATE_EPOCH is handled in hash_source_code_string(). }; for (const char* name : always_hash_env_vars) { const char* value = getenv(name); @@ -2170,8 +2270,8 @@ finalize_at_exit(Context& ctx) // Dump log buffer last to not lose any logs. if (ctx.config.debug() && !ctx.args_info.output_obj.empty()) { - const auto path = FMT("{}.ccache-log", ctx.args_info.output_obj); - Logging::dump_log(path); + Logging::dump_log(prepare_debug_path( + ctx.config.debug_dir(), ctx.args_info.output_obj, "log")); } } @@ -2319,7 +2419,8 @@ do_cache_compilation(Context& ctx, const char* const* argv) MTR_META_THREAD_NAME(ctx.args_info.output_obj.c_str()); if (ctx.config.debug()) { - std::string path = FMT("{}.ccache-input-text", ctx.args_info.output_obj); + const auto path = prepare_debug_path( + ctx.config.debug_dir(), ctx.args_info.output_obj, "input-text"); File debug_text_file(path, "w"); if (debug_text_file) { ctx.hash_debug_files.push_back(std::move(debug_text_file)); @@ -2333,8 +2434,7 @@ do_cache_compilation(Context& ctx, const char* const* argv) : nullptr; Hash common_hash; - init_hash_debug( - ctx, common_hash, ctx.args_info.output_obj, 'c', "COMMON", debug_text_file); + init_hash_debug(ctx, common_hash, 'c', "COMMON", debug_text_file); MTR_BEGIN("hash", "common_hash"); hash_common_info( @@ -2343,12 +2443,7 @@ do_cache_compilation(Context& ctx, const char* const* argv) // Try to find the hash using the manifest. Hash direct_hash = common_hash; - init_hash_debug(ctx, - direct_hash, - ctx.args_info.output_obj, - 'd', - "DIRECT MODE", - debug_text_file); + init_hash_debug(ctx, direct_hash, 'd', "DIRECT MODE", debug_text_file); Args args_to_hash = processed.preprocessor_args; args_to_hash.push_back(processed.extra_args_to_hash); @@ -2392,12 +2487,7 @@ do_cache_compilation(Context& ctx, const char* const* argv) // Find the hash using the preprocessed output. Also updates // ctx.included_files. Hash cpp_hash = common_hash; - init_hash_debug(ctx, - cpp_hash, - ctx.args_info.output_obj, - 'p', - "PREPROCESSOR MODE", - debug_text_file); + init_hash_debug(ctx, cpp_hash, 'p', "PREPROCESSOR MODE", debug_text_file); MTR_BEGIN("hash", "cpp_hash"); result_name = calculate_result_name( diff --git a/src/ccache.hpp b/src/ccache.hpp index 7f833ee..bf34cb0 100644 --- a/src/ccache.hpp +++ b/src/ccache.hpp @@ -32,26 +32,6 @@ class Context; extern const char CCACHE_VERSION[]; -const uint32_t SLOPPY_INCLUDE_FILE_MTIME = 1 << 0; -const uint32_t SLOPPY_INCLUDE_FILE_CTIME = 1 << 1; -const uint32_t SLOPPY_TIME_MACROS = 1 << 2; -const uint32_t SLOPPY_PCH_DEFINES = 1 << 3; -// Allow us to match files based on their stats (size, mtime, ctime), without -// looking at their contents. -const uint32_t SLOPPY_FILE_STAT_MATCHES = 1 << 4; -// Allow us to not include any system headers in the manifest include files, -// similar to -MM versus -M for dependencies. -const uint32_t SLOPPY_SYSTEM_HEADERS = 1 << 5; -// Allow us to ignore ctimes when comparing file stats, so we can fake mtimes -// if we want to (it is much harder to fake ctimes, requires changing clock) -const uint32_t SLOPPY_FILE_STAT_MATCHES_CTIME = 1 << 6; -// Allow us to not include the -index-store-path option in the manifest hash. -const uint32_t SLOPPY_CLANG_INDEX_STORE = 1 << 7; -// Ignore locale settings. -const uint32_t SLOPPY_LOCALE = 1 << 8; -// Allow caching even if -fmodules is used. -const uint32_t SLOPPY_MODULES = 1 << 9; - using FindExecutableFunction = std::function<std::string(const Context& ctx, const std::string& name, diff --git a/src/cleanup.cpp b/src/cleanup.cpp index 6d91d77..5c76ebb 100644 --- a/src/cleanup.cpp +++ b/src/cleanup.cpp @@ -23,6 +23,7 @@ #include "Config.hpp" #include "Context.hpp" #include "Logging.hpp" +#include "Statistics.hpp" #include "Util.hpp" #ifdef INODE_CACHE_SUPPORTED @@ -90,9 +91,8 @@ clean_up_dir(const std::string& subdir, { LOG("Cleaning up cache directory {}", subdir); - std::vector<std::shared_ptr<CacheFile>> files; - Util::get_level_1_files( - subdir, [&](double progress) { progress_receiver(progress / 3); }, files); + std::vector<CacheFile> files = Util::get_level_1_files( + subdir, [&](double progress) { progress_receiver(progress / 3); }); uint64_t cache_size = 0; uint64_t files_in_cache = 0; @@ -102,29 +102,27 @@ clean_up_dir(const std::string& subdir, ++i, progress_receiver(1.0 / 3 + 1.0 * i / files.size() / 3)) { const auto& file = files[i]; - if (!file->lstat().is_regular()) { + if (!file.lstat().is_regular()) { // Not a file or missing file. continue; } // Delete any tmp files older than 1 hour right away. - if (file->lstat().mtime() + 3600 < current_time - && Util::base_name(file->path()).find(".tmp.") != std::string::npos) { - Util::unlink_tmp(file->path()); + if (file.lstat().mtime() + 3600 < current_time + && Util::base_name(file.path()).find(".tmp.") != std::string::npos) { + Util::unlink_tmp(file.path()); continue; } - cache_size += file->lstat().size_on_disk(); + cache_size += file.lstat().size_on_disk(); files_in_cache += 1; } // Sort according to modification time, oldest first. - std::sort(files.begin(), - files.end(), - [](const std::shared_ptr<CacheFile>& f1, - const std::shared_ptr<CacheFile>& f2) { - return f1->lstat().mtime() < f2->lstat().mtime(); - }); + std::sort( + files.begin(), files.end(), [](const CacheFile& f1, const CacheFile& f2) { + return f1.lstat().mtime() < f2.lstat().mtime(); + }); LOG("Before cleanup: {:.0f} KiB, {:.0f} files", static_cast<double>(cache_size) / 1024, @@ -135,27 +133,26 @@ clean_up_dir(const std::string& subdir, ++i, progress_receiver(2.0 / 3 + 1.0 * i / files.size() / 3)) { const auto& file = files[i]; - if (!file->lstat() || file->lstat().is_directory()) { + if (!file.lstat() || file.lstat().is_directory()) { continue; } if ((max_size == 0 || cache_size <= max_size) && (max_files == 0 || files_in_cache <= max_files) && (max_age == 0 - || file->lstat().mtime() + || file.lstat().mtime() > (current_time - static_cast<int64_t>(max_age)))) { break; } - if (Util::ends_with(file->path(), ".stderr")) { + if (Util::ends_with(file.path(), ".stderr")) { // In order to be nice to legacy ccache versions, make sure that the .o // file is deleted before .stderr, because if the ccache process gets // killed after deleting the .stderr but before deleting the .o, the // cached result will be inconsistent. (.stderr is the only file that is // optional for legacy ccache versions; any other file missing from the // cache will be detected.) - std::string o_file = - file->path().substr(0, file->path().size() - 6) + "o"; + std::string o_file = file.path().substr(0, file.path().size() - 6) + "o"; // Don't subtract this extra deletion from the cache size; that // bookkeeping will be done when the loop reaches the .o file. If the @@ -167,7 +164,7 @@ clean_up_dir(const std::string& subdir, } delete_file( - file->path(), file->lstat().size_on_disk(), &cache_size, &files_in_cache); + file.path(), file.lstat().size_on_disk(), &cache_size, &files_in_cache); cleaned = true; } @@ -207,12 +204,11 @@ wipe_dir(const std::string& subdir, { LOG("Clearing out cache directory {}", subdir); - std::vector<std::shared_ptr<CacheFile>> files; - Util::get_level_1_files( - subdir, [&](double progress) { progress_receiver(progress / 2); }, files); + const std::vector<CacheFile> files = Util::get_level_1_files( + subdir, [&](double progress) { progress_receiver(progress / 2); }); for (size_t i = 0; i < files.size(); ++i) { - Util::unlink_safe(files[i]->path()); + Util::unlink_safe(files[i].path()); progress_receiver(0.5 + 0.5 * i / files.size()); } diff --git a/src/compress.cpp b/src/compress.cpp index 42e0179..1164b79 100644 --- a/src/compress.cpp +++ b/src/compress.cpp @@ -221,23 +221,20 @@ compress_stats(const Config& config, config.cache_dir(), [&](const std::string& subdir, const Util::ProgressReceiver& sub_progress_receiver) { - std::vector<std::shared_ptr<CacheFile>> files; - Util::get_level_1_files( - subdir, - [&](double progress) { sub_progress_receiver(progress / 2); }, - files); + const std::vector<CacheFile> files = Util::get_level_1_files( + subdir, [&](double progress) { sub_progress_receiver(progress / 2); }); for (size_t i = 0; i < files.size(); ++i) { const auto& cache_file = files[i]; - on_disk_size += cache_file->lstat().size_on_disk(); + on_disk_size += cache_file.lstat().size_on_disk(); try { - auto file = open_file(cache_file->path(), "rb"); - auto reader = create_reader(*cache_file, file.get()); - compr_size += cache_file->lstat().size(); + auto file = open_file(cache_file.path(), "rb"); + auto reader = create_reader(cache_file, file.get()); + compr_size += cache_file.lstat().size(); content_size += reader->content_size(); } catch (Error&) { - incompr_size += cache_file->lstat().size(); + incompr_size += cache_file.lstat().size(); } sub_progress_receiver(1.0 / 2 + 1.0 * i / files.size() / 2); @@ -290,27 +287,26 @@ compress_recompress(Context& ctx, ctx.config.cache_dir(), [&](const std::string& subdir, const Util::ProgressReceiver& sub_progress_receiver) { - std::vector<std::shared_ptr<CacheFile>> files; - Util::get_level_1_files( - subdir, - [&](double progress) { sub_progress_receiver(0.1 * progress); }, - files); + std::vector<CacheFile> files = + Util::get_level_1_files(subdir, [&](double progress) { + sub_progress_receiver(0.1 * progress); + }); auto stats_file = subdir + "/stats"; for (size_t i = 0; i < files.size(); ++i) { const auto& file = files[i]; - if (file->type() != CacheFile::Type::unknown) { + if (file.type() != CacheFile::Type::unknown) { thread_pool.enqueue([&statistics, stats_file, file, level] { try { - recompress_file(statistics, stats_file, *file, level); + recompress_file(statistics, stats_file, file, level); } catch (Error&) { // Ignore for now. } }); } else { - statistics.update(0, 0, 0, file->lstat().size()); + statistics.update(0, 0, 0, file.lstat().size()); } sub_progress_receiver(0.1 + 0.9 * i / files.size()); diff --git a/src/exceptions.hpp b/src/exceptions.hpp index 1c73141..f35f50c 100644 --- a/src/exceptions.hpp +++ b/src/exceptions.hpp @@ -21,7 +21,6 @@ #include "system.hpp" #include "FormatNonstdStringView.hpp" -#include "Statistics.hpp" #include "third_party/fmt/core.h" #include "third_party/nonstd/optional.hpp" @@ -80,38 +79,3 @@ inline Fatal::Fatal(T&&... args) : ErrorBase(fmt::format(std::forward<T>(args)...)) { } - -// Throw a Failure if ccache did not succeed in getting or putting a result in -// the cache. If `exit_code` is set, just exit with that code directly, -// otherwise execute the real compiler and exit with its exit code. Also updates -// statistics counter `statistic` if it's not `Statistic::none`. -class Failure : public std::exception -{ -public: - Failure(Statistic statistic, - nonstd::optional<int> exit_code = nonstd::nullopt); - - nonstd::optional<int> exit_code() const; - Statistic statistic() const; - -private: - Statistic m_statistic; - nonstd::optional<int> m_exit_code; -}; - -inline Failure::Failure(Statistic statistic, nonstd::optional<int> exit_code) - : m_statistic(statistic), m_exit_code(exit_code) -{ -} - -inline nonstd::optional<int> -Failure::exit_code() const -{ - return m_exit_code; -} - -inline Statistic -Failure::statistic() const -{ - return m_statistic; -} diff --git a/src/hashutil.cpp b/src/hashutil.cpp index 072d821..7378c02 100644 --- a/src/hashutil.cpp +++ b/src/hashutil.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2009-2020 Joel Rosdahl and other contributors +// Copyright (C) 2009-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -23,8 +23,8 @@ #include "Context.hpp" #include "Hash.hpp" #include "Logging.hpp" +#include "Sloppiness.hpp" #include "Stat.hpp" -#include "ccache.hpp" #include "execute.hpp" #include "fmtmacros.hpp" #include "macroskip.hpp" @@ -245,6 +245,15 @@ hash_source_code_string(const Context& ctx, hash.hash(now->tm_year); hash.hash(now->tm_mon); hash.hash(now->tm_mday); + + // If the compiler has support for it, the expansion of __DATE__ will change + // according to the value of SOURCE_DATE_EPOCH. Note: We have to hash both + // SOURCE_DATE_EPOCH and the current date since we can't be sure that the + // compiler honors SOURCE_DATE_EPOCH. + const auto source_date_epoch = getenv("SOURCE_DATE_EPOCH"); + if (source_date_epoch) { + hash.hash(source_date_epoch); + } } if (result & HASH_SOURCE_CODE_FOUND_TIME) { // We don't know for sure that the program actually uses the __TIME__ macro, @@ -254,6 +263,7 @@ hash_source_code_string(const Context& ctx, // __TIME__ has been found so that the direct mode can be disabled. LOG("Found __TIME__ in {}", path); } + if (result & HASH_SOURCE_CODE_FOUND_TIMESTAMP) { LOG("Found __TIMESTAMP__ in {}", path); diff --git a/src/language.cpp b/src/language.cpp index 70325ea..aa1a2ca 100644 --- a/src/language.cpp +++ b/src/language.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2010-2020 Joel Rosdahl and other contributors +// Copyright (C) 2010-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -67,7 +67,7 @@ const struct {".HXX", "c++-header"}, {".tcc", "c++-header"}, {".TCC", "c++-header"}, - {".cu", "cu"}, + {".cu", "cu"}, // Special case in language_for_file: "cuda" for Clang {".hip", "hip"}, {nullptr, nullptr}, }; @@ -84,7 +84,8 @@ const struct {"c++", "c++-cpp-output"}, {"c++-cpp-output", "c++-cpp-output"}, {"c++-header", "c++-cpp-output"}, - {"cu", "cpp-output"}, + {"cu", "cpp-output"}, // NVCC + {"cuda", "cpp-output"}, // Clang {"hip", "cpp-output"}, {"objective-c", "objective-c-cpp-output"}, {"objective-c-header", "objective-c-cpp-output"}, @@ -101,10 +102,26 @@ const struct } // namespace +bool +supported_source_extension(const std::string& fname) +{ + const auto ext = Util::get_extension(fname); + for (size_t i = 0; k_ext_lang_table[i].extension; ++i) { + if (k_ext_lang_table[i].extension == ext) { + return true; + } + } + return false; +} + std::string -language_for_file(const std::string& fname) +language_for_file(const std::string& fname, CompilerType compiler_type) { auto ext = Util::get_extension(fname); + if (ext == ".cu" && compiler_type == CompilerType::clang) { + // Special case: Clang maps .cu to cuda. + return "cuda"; + } for (size_t i = 0; k_ext_lang_table[i].extension; ++i) { if (k_ext_lang_table[i].extension == ext) { return k_ext_lang_table[i].language; diff --git a/src/language.hpp b/src/language.hpp index 69f7f26..99bf386 100644 --- a/src/language.hpp +++ b/src/language.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2010-2020 Joel Rosdahl and other contributors +// Copyright (C) 2010-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -20,11 +20,17 @@ #include "system.hpp" +#include "Config.hpp" + #include <string> -// Guess the language of `fname` based on its extension. Returns the empty -// string if the extension is unknown. -std::string language_for_file(const std::string& fname); +// Return whether a filename has a supported source code extension. +bool supported_source_extension(const std::string& fname); + +// Guess the language of `fname` based on its extension and a compiler type. +// Returns the empty string if the extension is unknown. +std::string language_for_file(const std::string& fname, + CompilerType compiler_type); // Return the preprocessed language for `language`, or the empty string if // unknown. diff --git a/src/system.hpp b/src/system.hpp index 79d07ef..ae4ca52 100644 --- a/src/system.hpp +++ b/src/system.hpp @@ -139,7 +139,6 @@ const mode_t S_IWUSR = mode_t(_S_IWRITE); # define NOMINMAX 1 # include <windows.h> # define mkdir(a, b) _mkdir(a) -# define link(src, dst) (CreateHardLink(dst, src, nullptr) ? 0 : -1) # define execv(a, b) win32execute(a, b, 0, -1, -1) # define strncasecmp _strnicmp # define strcasecmp _stricmp @@ -172,7 +171,7 @@ DLLIMPORT extern char** environ; # define O_BINARY 0 #endif -#ifdef HAVE_SYS_MMAN_H +#if defined(HAVE_SYS_MMAN_H) && defined(HAVE_PTHREAD_MUTEXATTR_SETPSHARED) # define INODE_CACHE_SUPPORTED #endif diff --git a/src/third_party/CMakeLists.txt b/src/third_party/CMakeLists.txt index d40110a..ed0ff9e 100644 --- a/src/third_party/CMakeLists.txt +++ b/src/third_party/CMakeLists.txt @@ -6,6 +6,10 @@ else() target_compile_definitions(third_party_lib PUBLIC -DSTATIC_GETOPT) endif() +if(WIN32) + target_sources(third_party_lib PRIVATE win32/mktemp.c) +endif () + if(ENABLE_TRACING) target_sources(third_party_lib PRIVATE minitrace.c) endif() @@ -36,7 +40,7 @@ endif() # Treat third party headers as system files (no warning for those headers). target_include_directories( third_party_lib - PRIVATE ${CMAKE_BINARY_DIR} . SYSTEM) + PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} SYSTEM) target_link_libraries(third_party_lib PRIVATE standard_settings) target_link_libraries(third_party_lib INTERFACE blake3) diff --git a/src/third_party/blake3/CMakeLists.txt b/src/third_party/blake3/CMakeLists.txt index a75e561..581ee81 100644 --- a/src/third_party/blake3/CMakeLists.txt +++ b/src/third_party/blake3/CMakeLists.txt @@ -2,50 +2,116 @@ add_library(blake3 STATIC blake3.c blake3_dispatch_ccache.c blake3_portable.c) target_link_libraries(blake3 PRIVATE standard_settings) -if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SIZEOF_VOID_P EQUAL 8 - AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" - AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)) - set(blake_source_type asm) - set(blake_suffix "_x86-64_unix.S") -else() - set(blake_source_type c) - set(blake_suffix ".c") +if(MSVC) + # No object file is created if masm is passed the compile options from standard_settings, + # so don't pass any flags at all to assembler (as no flags are needed anyway). + string(REPLACE "<FLAGS> " "" CMAKE_ASM_MASM_COMPILE_OBJECT "${CMAKE_ASM_MASM_COMPILE_OBJECT}") endif() -include(CheckAsmCompilerFlag) -include(CheckCCompilerFlag) - -function(add_source_if_enabled feature compile_flags) - string(TOUPPER "have_${blake_source_type}_${feature}" have_feature) - - # AVX512 support fails to compile with old Apple Clang versions even though - # the compiler accepts the -m flags. - if(${feature} STREQUAL "avx512" - AND CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" - AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) - message(STATUS "Detected unsupported compiler for ${have_feature} - disabled") - set(${have_feature} FALSE) - elseif(${blake_source_type} STREQUAL "asm") - check_asm_compiler_flag(${compile_flags} ${have_feature}) +include(CheckCSourceCompiles) + +function(add_source_if_enabled feature msvc_flags others_flags intrinsic) + if(MSVC) + set(compile_flags "${msvc_flags}") else() - check_c_compiler_flag(${compile_flags} ${have_feature}) + set(compile_flags "${others_flags}") + endif() + + # First check if it's possible to use the assembler variant for the feature. + string(TOUPPER "have_asm_${feature}" have_feature) + if(NOT DEFINED "${have_feature}" AND CMAKE_SIZEOF_VOID_P EQUAL 8) + if(MSVC) + set(suffix "_x86-64_windows_msvc.asm") + elseif(WIN32) + set(suffix "_x86-64_windows_gnu.S") + else() + set(suffix "_x86-64_unix.S") + endif() + + if(NOT CMAKE_REQUIRED_QUIET) + message(STATUS "Performing Test ${have_feature}") + endif() + + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + + # Must set CMAKE_ASM_MASM_CREATE_STATIC_LIBRARY explicitly otherwise try_compile + # fails, see https://discourse.cmake.org/t/building-lib-file-from-asm-cmake-bug/1959 + try_compile( + ${have_feature} + ${CMAKE_CURRENT_BINARY_DIR} + "${CMAKE_CURRENT_SOURCE_DIR}/blake3_${feature}${suffix}" + CMAKE_FLAGS -DCMAKE_ASM_MASM_CREATE_STATIC_LIBRARY=${CMAKE_C_CREATE_STATIC_LIBRARY} + COMPILE_DEFINITIONS ${compile_flags}) + + unset(CMAKE_TRY_COMPILE_TARGET_TYPE) + + if(NOT CMAKE_REQUIRED_QUIET) + if (${${have_feature}}) + message(STATUS "Performing Test ${have_feature} - Success") + else() + message(STATUS "Performing Test ${have_feature} - Failed") + endif() + endif() + endif() + + # If the assembler variant didn't work, try the c variant. + if(NOT ${have_feature}) + string(TOUPPER "have_c_${feature}" have_feature) + set(suffix ".c") + + set(CMAKE_REQUIRED_FLAGS ${compile_flags}) + check_c_source_compiles( + [=[ + #include <immintrin.h> + int main() { ${intrinsic}; return 0; } + ]=] + ${have_feature}) + unset(CMAKE_REQUIRED_FLAGS) endif() if(${have_feature}) - target_sources(blake3 PRIVATE blake3_${feature}${blake_suffix}) - set_property( - SOURCE blake3_${feature}${blake_suffix} - APPEND PROPERTY COMPILE_FLAGS ${compile_flags}) + target_sources(blake3 PRIVATE blake3_${feature}${suffix}) + if(suffix STREQUAL ".c") + if(MINGW AND feature STREQUAL "avx512") + # Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782. + # Taken from blake3's build.rs. + set(compile_flags "${compile_flags} -fno-asynchronous-unwind-tables") + endif() + set_property( + SOURCE blake3_${feature}${suffix} + APPEND PROPERTY COMPILE_FLAGS ${compile_flags}) + elseif(NOT MSVC) + set_property( + SOURCE blake3_${feature}${suffix} + PROPERTY COMPILE_FLAGS ${compile_flags}) + endif() else() string(TOUPPER "blake3_no_${feature}" no_feature) target_compile_definitions(blake3 PRIVATE ${no_feature}) endif() endfunction() -add_source_if_enabled(sse2 "-msse2") -add_source_if_enabled(sse41 "-msse4.1") -add_source_if_enabled(avx2 "-mavx2") -add_source_if_enabled(avx512 "-mavx512f -mavx512vl") +# https://software.intel.com/sites/landingpage/IntrinsicsGuide/ +add_source_if_enabled(sse2 "" "-msse2" + "_mm_set1_epi32(42)") +add_source_if_enabled(sse41 "" "-msse4.1" + "_mm_test_all_ones(_mm_set1_epi32(42))") +add_source_if_enabled(avx2 "/arch:AVX2" "-mavx2" + "_mm256_abs_epi8(_mm256_set1_epi32(42))") +add_source_if_enabled(avx512 "/arch:AVX512" "-mavx512f -mavx512vl" + "_mm256_abs_epi64(_mm256_set1_epi32(42))") -# TODO: how to detect ARM NEON support? -# If NEON, define BLAKE3_USE_NEON and build blake3_neon.c +# Neon is always available on AArch64 +if(CMAKE_SIZEOF_VOID_P EQUAL 8) + # https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics + check_c_source_compiles( + [=[ + #include <arm_neon.h> + int main() { vdupq_n_s32(42); return 0; } + ]=] + HAVE_NEON) + if(HAVE_NEON) + target_sources(blake3 PRIVATE blake3_neon.c) + target_compile_definitions(blake3 PRIVATE BLAKE3_USE_NEON) + endif() +endif() diff --git a/src/third_party/blake3/blake3.c b/src/third_party/blake3/blake3.c index 741a76d..7abf532 100644 --- a/src/third_party/blake3/blake3.c +++ b/src/third_party/blake3/blake3.c @@ -5,6 +5,10 @@ #include "blake3.h" #include "blake3_impl.h" +const char * blake3_version(void) { + return BLAKE3_VERSION_STRING; +} + INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8], uint8_t flags) { memcpy(self->cv, key, BLAKE3_KEY_LEN); diff --git a/src/third_party/blake3/blake3.h b/src/third_party/blake3/blake3.h index 51f1d2a..57ebd5a 100644 --- a/src/third_party/blake3/blake3.h +++ b/src/third_party/blake3/blake3.h @@ -8,6 +8,7 @@ extern "C" { #endif +#define BLAKE3_VERSION_STRING "0.3.7" #define BLAKE3_KEY_LEN 32 #define BLAKE3_OUT_LEN 32 #define BLAKE3_BLOCK_LEN 64 @@ -38,6 +39,7 @@ typedef struct { uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN]; } blake3_hasher; +const char * blake3_version(void); void blake3_hasher_init(blake3_hasher *self); void blake3_hasher_init_keyed(blake3_hasher *self, const uint8_t key[BLAKE3_KEY_LEN]); diff --git a/src/third_party/blake3/blake3_avx2_x86-64_windows_msvc.asm b/src/third_party/blake3/blake3_avx2_x86-64_windows_msvc.asm new file mode 100644 index 0000000..352298e --- /dev/null +++ b/src/third_party/blake3/blake3_avx2_x86-64_windows_msvc.asm @@ -0,0 +1,1828 @@ +public _blake3_hash_many_avx2 +public blake3_hash_many_avx2 + +_TEXT SEGMENT ALIGN(16) 'CODE' + +ALIGN 16 +blake3_hash_many_avx2 PROC +_blake3_hash_many_avx2 PROC + push r15 + push r14 + push r13 + push r12 + push rsi + push rdi + push rbx + push rbp + mov rbp, rsp + sub rsp, 880 + and rsp, 0FFFFFFFFFFFFFFC0H + vmovdqa xmmword ptr [rsp+2D0H], xmm6 + vmovdqa xmmword ptr [rsp+2E0H], xmm7 + vmovdqa xmmword ptr [rsp+2F0H], xmm8 + vmovdqa xmmword ptr [rsp+300H], xmm9 + vmovdqa xmmword ptr [rsp+310H], xmm10 + vmovdqa xmmword ptr [rsp+320H], xmm11 + vmovdqa xmmword ptr [rsp+330H], xmm12 + vmovdqa xmmword ptr [rsp+340H], xmm13 + vmovdqa xmmword ptr [rsp+350H], xmm14 + vmovdqa xmmword ptr [rsp+360H], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+68H] + movzx r9, byte ptr [rbp+70H] + neg r9d + vmovd xmm0, r9d + vpbroadcastd ymm0, xmm0 + vmovdqa ymmword ptr [rsp+260H], ymm0 + vpand ymm1, ymm0, ymmword ptr [ADD0] + vpand ymm2, ymm0, ymmword ptr [ADD1] + vmovdqa ymmword ptr [rsp+2A0H], ymm2 + vmovd xmm2, r8d + vpbroadcastd ymm2, xmm2 + vpaddd ymm2, ymm2, ymm1 + vmovdqa ymmword ptr [rsp+220H], ymm2 + vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK] + vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK] + vpcmpgtd ymm2, ymm1, ymm2 + shr r8, 32 + vmovd xmm3, r8d + vpbroadcastd ymm3, xmm3 + vpsubd ymm3, ymm3, ymm2 + vmovdqa ymmword ptr [rsp+240H], ymm3 + shl rdx, 6 + mov qword ptr [rsp+2C0H], rdx + cmp rsi, 8 + jc final7blocks +outerloop8: + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+4H] + vpbroadcastd ymm2, dword ptr [rcx+8H] + vpbroadcastd ymm3, dword ptr [rcx+0CH] + vpbroadcastd ymm4, dword ptr [rcx+10H] + vpbroadcastd ymm5, dword ptr [rcx+14H] + vpbroadcastd ymm6, dword ptr [rcx+18H] + vpbroadcastd ymm7, dword ptr [rcx+1CH] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov r12, qword ptr [rdi+20H] + mov r13, qword ptr [rdi+28H] + mov r14, qword ptr [rdi+30H] + mov r15, qword ptr [rdi+38H] + movzx eax, byte ptr [rbp+78H] + movzx ebx, byte ptr [rbp+80H] + or eax, ebx + xor edx, edx +ALIGN 16 +innerloop8: + movzx ebx, byte ptr [rbp+88H] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+2C0H] + cmove eax, ebx + mov dword ptr [rsp+200H], eax + vmovups xmm8, xmmword ptr [r8+rdx-40H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-40H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-40H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-40H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+20H], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+40H], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+60H], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-30H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-30H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-30H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-30H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+80H], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0A0H], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0C0H], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0E0H], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-20H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-20H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-20H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-20H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+100H], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+120H], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+140H], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+160H], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-10H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-10H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-10H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-10H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+180H], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+1A0H], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+1C0H], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+1E0H], ymm11 + vpbroadcastd ymm15, dword ptr [rsp+200H] + prefetcht0 byte ptr [r8+rdx+80H] + prefetcht0 byte ptr [r12+rdx+80H] + prefetcht0 byte ptr [r9+rdx+80H] + prefetcht0 byte ptr [r13+rdx+80H] + prefetcht0 byte ptr [r10+rdx+80H] + prefetcht0 byte ptr [r14+rdx+80H] + prefetcht0 byte ptr [r11+rdx+80H] + prefetcht0 byte ptr [r15+rdx+80H] + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+40H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+80H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm0, ymmword ptr [rsp+220H] + vpxor ymm13, ymm1, ymmword ptr [rsp+240H] + vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN] + vpxor ymm15, ymm3, ymm15 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0] + vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1] + vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2] + vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3] + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+20H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+60H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+100H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+140H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+180H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+120H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+160H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+40H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+60H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0E0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+80H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+140H] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+20H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+180H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+120H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+160H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+100H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+60H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+140H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+80H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+180H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+40H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+120H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+160H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+100H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0A0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+20H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+140H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+180H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+120H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+60H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+80H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+160H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+20H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+40H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+100H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+180H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+120H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+160H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+140H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+100H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+40H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+60H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+20H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+80H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+120H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+160H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+100H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+180H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+20H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+40H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+80H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+60H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+140H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0C0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+160H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+20H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+100H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1E0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+120H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+40H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+60H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+140H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+180H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+80H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+78H] + jne innerloop8 + mov rbx, qword ptr [rbp+90H] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0CCH + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0CCH + vblendps ymm3, ymm12, ymm9, 0CCH + vperm2f128 ymm12, ymm1, ymm2, 20H + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0CCH + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 20H + vmovups ymmword ptr [rbx+20H], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0CCH + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0CCH + vblendps ymm14, ymm14, ymm13, 0CCH + vperm2f128 ymm8, ymm10, ymm14, 20H + vmovups ymmword ptr [rbx+40H], ymm8 + vblendps ymm15, ymm13, ymm15, 0CCH + vperm2f128 ymm13, ymm6, ymm15, 20H + vmovups ymmword ptr [rbx+60H], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 31H + vperm2f128 ymm11, ymm3, ymm4, 31H + vmovups ymmword ptr [rbx+80H], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 31H + vperm2f128 ymm15, ymm6, ymm15, 31H + vmovups ymmword ptr [rbx+0A0H], ymm11 + vmovups ymmword ptr [rbx+0C0H], ymm14 + vmovups ymmword ptr [rbx+0E0H], ymm15 + vmovdqa ymm0, ymmword ptr [rsp+2A0H] + vpaddd ymm1, ymm0, ymmword ptr [rsp+220H] + vmovdqa ymmword ptr [rsp+220H], ymm1 + vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK] + vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK] + vpcmpgtd ymm2, ymm0, ymm2 + vmovdqa ymm0, ymmword ptr [rsp+240H] + vpsubd ymm2, ymm0, ymm2 + vmovdqa ymmword ptr [rsp+240H], ymm2 + add rdi, 64 + add rbx, 256 + mov qword ptr [rbp+90H], rbx + sub rsi, 8 + cmp rsi, 8 + jnc outerloop8 + test rsi, rsi + jnz final7blocks +unwind: + vzeroupper + vmovdqa xmm6, xmmword ptr [rsp+2D0H] + vmovdqa xmm7, xmmword ptr [rsp+2E0H] + vmovdqa xmm8, xmmword ptr [rsp+2F0H] + vmovdqa xmm9, xmmword ptr [rsp+300H] + vmovdqa xmm10, xmmword ptr [rsp+310H] + vmovdqa xmm11, xmmword ptr [rsp+320H] + vmovdqa xmm12, xmmword ptr [rsp+330H] + vmovdqa xmm13, xmmword ptr [rsp+340H] + vmovdqa xmm14, xmmword ptr [rsp+350H] + vmovdqa xmm15, xmmword ptr [rsp+360H] + mov rsp, rbp + pop rbp + pop rbx + pop rdi + pop rsi + pop r12 + pop r13 + pop r14 + pop r15 + ret +ALIGN 16 +final7blocks: + mov rbx, qword ptr [rbp+90H] + mov r15, qword ptr [rsp+2C0H] + movzx r13d, byte ptr [rbp+78H] + movzx r12d, byte ptr [rbp+88H] + test rsi, 4H + je final3blocks + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+10H] + vmovdqa ymm8, ymm0 + vmovdqa ymm9, ymm1 + vbroadcasti128 ymm12, xmmword ptr [rsp+220H] + vbroadcasti128 ymm13, xmmword ptr [rsp+240H] + vpunpckldq ymm14, ymm12, ymm13 + vpunpckhdq ymm15, ymm12, ymm13 + vpermq ymm14, ymm14, 50H + vpermq ymm15, ymm15, 50H + vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN] + vpblendd ymm14, ymm14, ymm12, 44H + vpblendd ymm15, ymm15, ymm12, 44H + vmovdqa ymmword ptr [rsp], ymm14 + vmovdqa ymmword ptr [rsp+20H], ymm15 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop4: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+200H], eax + vmovups ymm2, ymmword ptr [r8+rdx-40H] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-40H], 01H + vmovups ymm3, ymmword ptr [r8+rdx-30H] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-30H], 01H + vshufps ymm4, ymm2, ymm3, 136 + vshufps ymm5, ymm2, ymm3, 221 + vmovups ymm2, ymmword ptr [r8+rdx-20H] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-20H], 01H + vmovups ymm3, ymmword ptr [r8+rdx-10H] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-10H], 01H + vshufps ymm6, ymm2, ymm3, 136 + vshufps ymm7, ymm2, ymm3, 221 + vpshufd ymm6, ymm6, 93H + vpshufd ymm7, ymm7, 93H + vmovups ymm10, ymmword ptr [r10+rdx-40H] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-40H], 01H + vmovups ymm11, ymmword ptr [r10+rdx-30H] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-30H], 01H + vshufps ymm12, ymm10, ymm11, 136 + vshufps ymm13, ymm10, ymm11, 221 + vmovups ymm10, ymmword ptr [r10+rdx-20H] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-20H], 01H + vmovups ymm11, ymmword ptr [r10+rdx-10H] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-10H], 01H + vshufps ymm14, ymm10, ymm11, 136 + vshufps ymm15, ymm10, ymm11, 221 + vpshufd ymm14, ymm14, 93H + vpshufd ymm15, ymm15, 93H + vpbroadcastd ymm2, dword ptr [rsp+200H] + vmovdqa ymm3, ymmword ptr [rsp] + vmovdqa ymm11, ymmword ptr [rsp+20H] + vpblendd ymm3, ymm3, ymm2, 88H + vpblendd ymm11, ymm11, ymm2, 88H + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV] + vmovdqa ymm10, ymm2 + mov al, 7 +roundloop4: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm8, ymm8, ymm12 + vmovdqa ymmword ptr [rsp+40H], ymm4 + nop + vmovdqa ymmword ptr [rsp+60H], ymm12 + nop + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vmovdqa ymmword ptr [rsp+80H], ymm5 + vmovdqa ymmword ptr [rsp+0A0H], ymm13 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 93H + vpshufd ymm8, ymm8, 93H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm11, ymm11, 4EH + vpshufd ymm2, ymm2, 39H + vpshufd ymm10, ymm10, 39H + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm8, ymm8, ymm14 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm8, ymm8, ymm15 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 39H + vpshufd ymm8, ymm8, 39H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm11, ymm11, 4EH + vpshufd ymm2, ymm2, 93H + vpshufd ymm10, ymm10, 93H + dec al + je endroundloop4 + vmovdqa ymm4, ymmword ptr [rsp+40H] + vmovdqa ymm5, ymmword ptr [rsp+80H] + vshufps ymm12, ymm4, ymm5, 214 + vpshufd ymm13, ymm4, 0FH + vpshufd ymm4, ymm12, 39H + vshufps ymm12, ymm6, ymm7, 250 + vpblendd ymm13, ymm13, ymm12, 0AAH + vpunpcklqdq ymm12, ymm7, ymm5 + vpblendd ymm12, ymm12, ymm6, 88H + vpshufd ymm12, ymm12, 78H + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 1EH + vmovdqa ymmword ptr [rsp+40H], ymm13 + vmovdqa ymmword ptr [rsp+80H], ymm12 + vmovdqa ymm12, ymmword ptr [rsp+60H] + vmovdqa ymm13, ymmword ptr [rsp+0A0H] + vshufps ymm5, ymm12, ymm13, 214 + vpshufd ymm6, ymm12, 0FH + vpshufd ymm12, ymm5, 39H + vshufps ymm5, ymm14, ymm15, 250 + vpblendd ymm6, ymm6, ymm5, 0AAH + vpunpcklqdq ymm5, ymm15, ymm13 + vpblendd ymm5, ymm5, ymm14, 88H + vpshufd ymm5, ymm5, 78H + vpunpckhdq ymm13, ymm13, ymm15 + vpunpckldq ymm14, ymm14, ymm13 + vpshufd ymm15, ymm14, 1EH + vmovdqa ymm13, ymm6 + vmovdqa ymm14, ymm5 + vmovdqa ymm5, ymmword ptr [rsp+40H] + vmovdqa ymm6, ymmword ptr [rsp+80H] + jmp roundloop4 +endroundloop4: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + vpxor ymm8, ymm8, ymm10 + vpxor ymm9, ymm9, ymm11 + mov eax, r13d + cmp rdx, r15 + jne innerloop4 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + vextracti128 xmmword ptr [rbx+20H], ymm0, 01H + vextracti128 xmmword ptr [rbx+30H], ymm1, 01H + vmovdqu xmmword ptr [rbx+40H], xmm8 + vmovdqu xmmword ptr [rbx+50H], xmm9 + vextracti128 xmmword ptr [rbx+60H], ymm8, 01H + vextracti128 xmmword ptr [rbx+70H], ymm9, 01H + vmovaps xmm8, xmmword ptr [rsp+260H] + vmovaps xmm0, xmmword ptr [rsp+220H] + vmovaps xmm1, xmmword ptr [rsp+230H] + vmovaps xmm2, xmmword ptr [rsp+240H] + vmovaps xmm3, xmmword ptr [rsp+250H] + vblendvps xmm0, xmm0, xmm1, xmm8 + vblendvps xmm2, xmm2, xmm3, xmm8 + vmovaps xmmword ptr [rsp+220H], xmm0 + vmovaps xmmword ptr [rsp+240H], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +final3blocks: + test rsi, 2H + je final1blocks + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+10H] + vmovd xmm13, dword ptr [rsp+220H] + vpinsrd xmm13, xmm13, dword ptr [rsp+240H], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2 + vmovd xmm14, dword ptr [rsp+224H] + vpinsrd xmm14, xmm14, dword ptr [rsp+244H], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2 + vinserti128 ymm13, ymm13, xmm14, 01H + vbroadcasti128 ymm14, xmmword ptr [ROT16] + vbroadcasti128 ymm15, xmmword ptr [ROT8] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+200H], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV] + vpbroadcastd ymm8, dword ptr [rsp+200H] + vpblendd ymm3, ymm13, ymm8, 88H + vmovups ymm8, ymmword ptr [r8+rdx-40H] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H + vmovups ymm9, ymmword ptr [r8+rdx-30H] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-20H] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H + vmovups ymm9, ymmword ptr [r8+rdx-10H] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 93H + vpshufd ymm7, ymm7, 93H + mov al, 7 +roundloop2: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 93H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm2, ymm2, 39H + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 39H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm2, ymm2, 93H + dec al + jz endroundloop2 + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0FH + vpshufd ymm4, ymm8, 39H + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0AAH + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 88H + vpshufd ymm8, ymm8, 78H + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 1EH + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp roundloop2 +endroundloop2: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop2 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + vextracti128 xmmword ptr [rbx+20H], ymm0, 01H + vextracti128 xmmword ptr [rbx+30H], ymm1, 01H + vmovaps ymm8, ymmword ptr [rsp+260H] + vmovaps ymm0, ymmword ptr [rsp+220H] + vmovups ymm1, ymmword ptr [rsp+228H] + vmovaps ymm2, ymmword ptr [rsp+240H] + vmovups ymm3, ymmword ptr [rsp+248H] + vblendvps ymm0, ymm0, ymm1, ymm8 + vblendvps ymm2, ymm2, ymm3, ymm8 + vmovaps ymmword ptr [rsp+220H], ymm0 + vmovaps ymmword ptr [rsp+240H], ymm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +final1blocks: + test rsi, 1H + je unwind + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+10H] + vmovd xmm3, dword ptr [rsp+220H] + vpinsrd xmm3, xmm3, dword ptr [rsp+240H], 1 + vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN], 2 + vmovdqa xmm14, xmmword ptr [ROT16] + vmovdqa xmm15, xmmword ptr [ROT8] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop1: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vmovdqa xmm2, xmmword ptr [BLAKE3_IV] + vmovdqa xmm3, xmm13 + vpinsrd xmm3, xmm3, eax, 3 + vmovups xmm8, xmmword ptr [r8+rdx-40H] + vmovups xmm9, xmmword ptr [r8+rdx-30H] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-20H] + vmovups xmm9, xmmword ptr [r8+rdx-10H] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 93H + vpshufd xmm7, xmm7, 93H + mov al, 7 +roundloop1: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 93H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 39H + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 39H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 93H + dec al + jz endroundloop1 + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0FH + vpshufd xmm4, xmm8, 39H + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0AAH + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 88H + vpshufd xmm8, xmm8, 78H + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 1EH + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp roundloop1 +endroundloop1: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop1 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + jmp unwind + +_blake3_hash_many_avx2 ENDP +blake3_hash_many_avx2 ENDP +_TEXT ENDS + +_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST' +ALIGN 64 +ADD0: + dd 0, 1, 2, 3, 4, 5, 6, 7 + +ADD1: + dd 8 dup (8) + +BLAKE3_IV_0: + dd 8 dup (6A09E667H) + +BLAKE3_IV_1: + dd 8 dup (0BB67AE85H) + +BLAKE3_IV_2: + dd 8 dup (3C6EF372H) + +BLAKE3_IV_3: + dd 8 dup (0A54FF53AH) + +BLAKE3_BLOCK_LEN: + dd 8 dup (64) + +ROT16: + db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 + +ROT8: + db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 + +CMP_MSB_MASK: + dd 8 dup(80000000H) + +BLAKE3_IV: + dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH + +_RDATA ENDS +END diff --git a/src/third_party/blake3/blake3_avx512_x86-64_windows_msvc.asm b/src/third_party/blake3/blake3_avx512_x86-64_windows_msvc.asm new file mode 100644 index 0000000..97a7268 --- /dev/null +++ b/src/third_party/blake3/blake3_avx512_x86-64_windows_msvc.asm @@ -0,0 +1,2634 @@ +public _blake3_hash_many_avx512 +public blake3_hash_many_avx512 +public blake3_compress_in_place_avx512 +public _blake3_compress_in_place_avx512 +public blake3_compress_xof_avx512 +public _blake3_compress_xof_avx512 + +_TEXT SEGMENT ALIGN(16) 'CODE' + +ALIGN 16 +blake3_hash_many_avx512 PROC +_blake3_hash_many_avx512 PROC + push r15 + push r14 + push r13 + push r12 + push rdi + push rsi + push rbx + push rbp + mov rbp, rsp + sub rsp, 304 + and rsp, 0FFFFFFFFFFFFFFC0H + vmovdqa xmmword ptr [rsp+90H], xmm6 + vmovdqa xmmword ptr [rsp+0A0H], xmm7 + vmovdqa xmmword ptr [rsp+0B0H], xmm8 + vmovdqa xmmword ptr [rsp+0C0H], xmm9 + vmovdqa xmmword ptr [rsp+0D0H], xmm10 + vmovdqa xmmword ptr [rsp+0E0H], xmm11 + vmovdqa xmmword ptr [rsp+0F0H], xmm12 + vmovdqa xmmword ptr [rsp+100H], xmm13 + vmovdqa xmmword ptr [rsp+110H], xmm14 + vmovdqa xmmword ptr [rsp+120H], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+68H] + movzx r9, byte ptr [rbp+70H] + neg r9 + kmovw k1, r9d + vmovd xmm0, r8d + vpbroadcastd ymm0, xmm0 + shr r8, 32 + vmovd xmm1, r8d + vpbroadcastd ymm1, xmm1 + vmovdqa ymm4, ymm1 + vmovdqa ymm5, ymm1 + vpaddd ymm2, ymm0, ymmword ptr [ADD0] + vpaddd ymm3, ymm0, ymmword ptr [ADD0+32] + vpcmpud k2, ymm2, ymm0, 1 + vpcmpud k3, ymm3, ymm0, 1 + ; XXX: ml64.exe does not currently understand the syntax. We use a workaround. + vpbroadcastd ymm6, dword ptr [ADD1] + vpaddd ymm4 {k2}, ymm4, ymm6 + vpaddd ymm5 {k3}, ymm5, ymm6 + ; vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1] {1to8} + ; vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1] {1to8} + knotw k2, k1 + vmovdqa32 ymm2 {k2}, ymm0 + vmovdqa32 ymm3 {k2}, ymm0 + vmovdqa32 ymm4 {k2}, ymm1 + vmovdqa32 ymm5 {k2}, ymm1 + vmovdqa ymmword ptr [rsp], ymm2 + vmovdqa ymmword ptr [rsp+20H], ymm3 + vmovdqa ymmword ptr [rsp+40H], ymm4 + vmovdqa ymmword ptr [rsp+60H], ymm5 + shl rdx, 6 + mov qword ptr [rsp+80H], rdx + cmp rsi, 16 + jc final15blocks +outerloop16: + vpbroadcastd zmm0, dword ptr [rcx] + vpbroadcastd zmm1, dword ptr [rcx+1H*4H] + vpbroadcastd zmm2, dword ptr [rcx+2H*4H] + vpbroadcastd zmm3, dword ptr [rcx+3H*4H] + vpbroadcastd zmm4, dword ptr [rcx+4H*4H] + vpbroadcastd zmm5, dword ptr [rcx+5H*4H] + vpbroadcastd zmm6, dword ptr [rcx+6H*4H] + vpbroadcastd zmm7, dword ptr [rcx+7H*4H] + movzx eax, byte ptr [rbp+78H] + movzx ebx, byte ptr [rbp+80H] + or eax, ebx + xor edx, edx +ALIGN 16 +innerloop16: + movzx ebx, byte ptr [rbp+88H] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+80H] + cmove eax, ebx + mov dword ptr [rsp+88H], eax + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov r12, qword ptr [rdi+40H] + mov r13, qword ptr [rdi+48H] + mov r14, qword ptr [rdi+50H] + mov r15, qword ptr [rdi+58H] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H] + vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H + vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H] + vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H + vpunpcklqdq zmm8, zmm16, zmm17 + vpunpckhqdq zmm9, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H] + vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H + vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H] + vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H + vpunpcklqdq zmm10, zmm18, zmm19 + vpunpckhqdq zmm11, zmm18, zmm19 + mov r8, qword ptr [rdi+20H] + mov r9, qword ptr [rdi+28H] + mov r10, qword ptr [rdi+30H] + mov r11, qword ptr [rdi+38H] + mov r12, qword ptr [rdi+60H] + mov r13, qword ptr [rdi+68H] + mov r14, qword ptr [rdi+70H] + mov r15, qword ptr [rdi+78H] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H] + vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H + vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H] + vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H + vpunpcklqdq zmm12, zmm16, zmm17 + vpunpckhqdq zmm13, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H] + vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H + vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H] + vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H + vpunpcklqdq zmm14, zmm18, zmm19 + vpunpckhqdq zmm15, zmm18, zmm19 + vmovdqa32 zmm27, zmmword ptr [INDEX0] + vmovdqa32 zmm31, zmmword ptr [INDEX1] + vshufps zmm16, zmm8, zmm10, 136 + vshufps zmm17, zmm12, zmm14, 136 + vmovdqa32 zmm20, zmm16 + vpermt2d zmm16, zmm27, zmm17 + vpermt2d zmm20, zmm31, zmm17 + vshufps zmm17, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm21, zmm17 + vpermt2d zmm17, zmm27, zmm30 + vpermt2d zmm21, zmm31, zmm30 + vshufps zmm18, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm22, zmm18 + vpermt2d zmm18, zmm27, zmm8 + vpermt2d zmm22, zmm31, zmm8 + vshufps zmm19, zmm9, zmm11, 221 + vshufps zmm8, zmm13, zmm15, 221 + vmovdqa32 zmm23, zmm19 + vpermt2d zmm19, zmm27, zmm8 + vpermt2d zmm23, zmm31, zmm8 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov r12, qword ptr [rdi+40H] + mov r13, qword ptr [rdi+48H] + mov r14, qword ptr [rdi+50H] + mov r15, qword ptr [rdi+58H] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H] + vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H + vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H] + vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H + vpunpcklqdq zmm8, zmm24, zmm25 + vpunpckhqdq zmm9, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H] + vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H + vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H] + vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H + vpunpcklqdq zmm10, zmm24, zmm25 + vpunpckhqdq zmm11, zmm24, zmm25 + prefetcht0 byte ptr [r8+rdx+80H] + prefetcht0 byte ptr [r12+rdx+80H] + prefetcht0 byte ptr [r9+rdx+80H] + prefetcht0 byte ptr [r13+rdx+80H] + prefetcht0 byte ptr [r10+rdx+80H] + prefetcht0 byte ptr [r14+rdx+80H] + prefetcht0 byte ptr [r11+rdx+80H] + prefetcht0 byte ptr [r15+rdx+80H] + mov r8, qword ptr [rdi+20H] + mov r9, qword ptr [rdi+28H] + mov r10, qword ptr [rdi+30H] + mov r11, qword ptr [rdi+38H] + mov r12, qword ptr [rdi+60H] + mov r13, qword ptr [rdi+68H] + mov r14, qword ptr [rdi+70H] + mov r15, qword ptr [rdi+78H] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H] + vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H + vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H] + vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H + vpunpcklqdq zmm12, zmm24, zmm25 + vpunpckhqdq zmm13, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H] + vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H + vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H] + vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H + vpunpcklqdq zmm14, zmm24, zmm25 + vpunpckhqdq zmm15, zmm24, zmm25 + prefetcht0 byte ptr [r8+rdx+80H] + prefetcht0 byte ptr [r12+rdx+80H] + prefetcht0 byte ptr [r9+rdx+80H] + prefetcht0 byte ptr [r13+rdx+80H] + prefetcht0 byte ptr [r10+rdx+80H] + prefetcht0 byte ptr [r14+rdx+80H] + prefetcht0 byte ptr [r11+rdx+80H] + prefetcht0 byte ptr [r15+rdx+80H] + vshufps zmm24, zmm8, zmm10, 136 + vshufps zmm30, zmm12, zmm14, 136 + vmovdqa32 zmm28, zmm24 + vpermt2d zmm24, zmm27, zmm30 + vpermt2d zmm28, zmm31, zmm30 + vshufps zmm25, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm29, zmm25 + vpermt2d zmm25, zmm27, zmm30 + vpermt2d zmm29, zmm31, zmm30 + vshufps zmm26, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm30, zmm26 + vpermt2d zmm26, zmm27, zmm8 + vpermt2d zmm30, zmm31, zmm8 + vshufps zmm8, zmm9, zmm11, 221 + vshufps zmm10, zmm13, zmm15, 221 + vpermi2d zmm27, zmm8, zmm10 + vpermi2d zmm31, zmm8, zmm10 + vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0] + vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1] + vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2] + vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3] + vmovdqa32 zmm12, zmmword ptr [rsp] + vmovdqa32 zmm13, zmmword ptr [rsp+1H*40H] + vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN] + vpbroadcastd zmm15, dword ptr [rsp+22H*4H] + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm24 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm23 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm27 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm21 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm28 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm26 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm22 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm31 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpxord zmm0, zmm0, zmm8 + vpxord zmm1, zmm1, zmm9 + vpxord zmm2, zmm2, zmm10 + vpxord zmm3, zmm3, zmm11 + vpxord zmm4, zmm4, zmm12 + vpxord zmm5, zmm5, zmm13 + vpxord zmm6, zmm6, zmm14 + vpxord zmm7, zmm7, zmm15 + movzx eax, byte ptr [rbp+78H] + jne innerloop16 + mov rbx, qword ptr [rbp+90H] + vpunpckldq zmm16, zmm0, zmm1 + vpunpckhdq zmm17, zmm0, zmm1 + vpunpckldq zmm18, zmm2, zmm3 + vpunpckhdq zmm19, zmm2, zmm3 + vpunpckldq zmm20, zmm4, zmm5 + vpunpckhdq zmm21, zmm4, zmm5 + vpunpckldq zmm22, zmm6, zmm7 + vpunpckhdq zmm23, zmm6, zmm7 + vpunpcklqdq zmm0, zmm16, zmm18 + vpunpckhqdq zmm1, zmm16, zmm18 + vpunpcklqdq zmm2, zmm17, zmm19 + vpunpckhqdq zmm3, zmm17, zmm19 + vpunpcklqdq zmm4, zmm20, zmm22 + vpunpckhqdq zmm5, zmm20, zmm22 + vpunpcklqdq zmm6, zmm21, zmm23 + vpunpckhqdq zmm7, zmm21, zmm23 + vshufi32x4 zmm16, zmm0, zmm4, 88H + vshufi32x4 zmm17, zmm1, zmm5, 88H + vshufi32x4 zmm18, zmm2, zmm6, 88H + vshufi32x4 zmm19, zmm3, zmm7, 88H + vshufi32x4 zmm20, zmm0, zmm4, 0DDH + vshufi32x4 zmm21, zmm1, zmm5, 0DDH + vshufi32x4 zmm22, zmm2, zmm6, 0DDH + vshufi32x4 zmm23, zmm3, zmm7, 0DDH + vshufi32x4 zmm0, zmm16, zmm17, 88H + vshufi32x4 zmm1, zmm18, zmm19, 88H + vshufi32x4 zmm2, zmm20, zmm21, 88H + vshufi32x4 zmm3, zmm22, zmm23, 88H + vshufi32x4 zmm4, zmm16, zmm17, 0DDH + vshufi32x4 zmm5, zmm18, zmm19, 0DDH + vshufi32x4 zmm6, zmm20, zmm21, 0DDH + vshufi32x4 zmm7, zmm22, zmm23, 0DDH + vmovdqu32 zmmword ptr [rbx], zmm0 + vmovdqu32 zmmword ptr [rbx+1H*40H], zmm1 + vmovdqu32 zmmword ptr [rbx+2H*40H], zmm2 + vmovdqu32 zmmword ptr [rbx+3H*40H], zmm3 + vmovdqu32 zmmword ptr [rbx+4H*40H], zmm4 + vmovdqu32 zmmword ptr [rbx+5H*40H], zmm5 + vmovdqu32 zmmword ptr [rbx+6H*40H], zmm6 + vmovdqu32 zmmword ptr [rbx+7H*40H], zmm7 + vmovdqa32 zmm0, zmmword ptr [rsp] + vmovdqa32 zmm1, zmmword ptr [rsp+1H*40H] + vmovdqa32 zmm2, zmm0 + ; XXX: ml64.exe does not currently understand the syntax. We use a workaround. + vpbroadcastd zmm4, dword ptr [ADD16] + vpbroadcastd zmm5, dword ptr [ADD1] + vpaddd zmm2{k1}, zmm0, zmm4 + ; vpaddd zmm2{k1}, zmm0, dword ptr [ADD16] ; {1to16} + vpcmpud k2, zmm2, zmm0, 1 + vpaddd zmm1 {k2}, zmm1, zmm5 + ; vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1] ; {1to16} + vmovdqa32 zmmword ptr [rsp], zmm2 + vmovdqa32 zmmword ptr [rsp+1H*40H], zmm1 + add rdi, 128 + add rbx, 512 + mov qword ptr [rbp+90H], rbx + sub rsi, 16 + cmp rsi, 16 + jnc outerloop16 + test rsi, rsi + jne final15blocks +unwind: + vzeroupper + vmovdqa xmm6, xmmword ptr [rsp+90H] + vmovdqa xmm7, xmmword ptr [rsp+0A0H] + vmovdqa xmm8, xmmword ptr [rsp+0B0H] + vmovdqa xmm9, xmmword ptr [rsp+0C0H] + vmovdqa xmm10, xmmword ptr [rsp+0D0H] + vmovdqa xmm11, xmmword ptr [rsp+0E0H] + vmovdqa xmm12, xmmword ptr [rsp+0F0H] + vmovdqa xmm13, xmmword ptr [rsp+100H] + vmovdqa xmm14, xmmword ptr [rsp+110H] + vmovdqa xmm15, xmmword ptr [rsp+120H] + mov rsp, rbp + pop rbp + pop rbx + pop rsi + pop rdi + pop r12 + pop r13 + pop r14 + pop r15 + ret +ALIGN 16 +final15blocks: + test esi, 8H + je final7blocks + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+4H] + vpbroadcastd ymm2, dword ptr [rcx+8H] + vpbroadcastd ymm3, dword ptr [rcx+0CH] + vpbroadcastd ymm4, dword ptr [rcx+10H] + vpbroadcastd ymm5, dword ptr [rcx+14H] + vpbroadcastd ymm6, dword ptr [rcx+18H] + vpbroadcastd ymm7, dword ptr [rcx+1CH] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov r12, qword ptr [rdi+20H] + mov r13, qword ptr [rdi+28H] + mov r14, qword ptr [rdi+30H] + mov r15, qword ptr [rdi+38H] + movzx eax, byte ptr [rbp+78H] + movzx ebx, byte ptr [rbp+80H] + or eax, ebx + xor edx, edx +innerloop8: + movzx ebx, byte ptr [rbp+88H] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+80H] + cmove eax, ebx + mov dword ptr [rsp+88H], eax + vmovups xmm8, xmmword ptr [r8+rdx-40H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-40H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-40H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-40H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm16, ymm12, ymm14, 136 + vshufps ymm17, ymm12, ymm14, 221 + vshufps ymm18, ymm13, ymm15, 136 + vshufps ymm19, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-30H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-30H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-30H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-30H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm20, ymm12, ymm14, 136 + vshufps ymm21, ymm12, ymm14, 221 + vshufps ymm22, ymm13, ymm15, 136 + vshufps ymm23, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-20H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-20H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-20H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-20H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm24, ymm12, ymm14, 136 + vshufps ymm25, ymm12, ymm14, 221 + vshufps ymm26, ymm13, ymm15, 136 + vshufps ymm27, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-10H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-10H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-10H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-10H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm28, ymm12, ymm14, 136 + vshufps ymm29, ymm12, ymm14, 221 + vshufps ymm30, ymm13, ymm15, 136 + vshufps ymm31, ymm13, ymm15, 221 + vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0] + vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1] + vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2] + vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3] + vmovdqa ymm12, ymmword ptr [rsp] + vmovdqa ymm13, ymmword ptr [rsp+40H] + vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN] + vpbroadcastd ymm15, dword ptr [rsp+88H] + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm24 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm23 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm27 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm21 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm28 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm26 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm22 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm31 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+78H] + jne innerloop8 + mov rbx, qword ptr [rbp+90H] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0CCH + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0CCH + vblendps ymm3, ymm12, ymm9, 0CCH + vperm2f128 ymm12, ymm1, ymm2, 20H + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0CCH + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 20H + vmovups ymmword ptr [rbx+20H], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0CCH + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0CCH + vblendps ymm14, ymm14, ymm13, 0CCH + vperm2f128 ymm8, ymm10, ymm14, 20H + vmovups ymmword ptr [rbx+40H], ymm8 + vblendps ymm15, ymm13, ymm15, 0CCH + vperm2f128 ymm13, ymm6, ymm15, 20H + vmovups ymmword ptr [rbx+60H], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 31H + vperm2f128 ymm11, ymm3, ymm4, 31H + vmovups ymmword ptr [rbx+80H], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 31H + vperm2f128 ymm15, ymm6, ymm15, 31H + vmovups ymmword ptr [rbx+0A0H], ymm11 + vmovups ymmword ptr [rbx+0C0H], ymm14 + vmovups ymmword ptr [rbx+0E0H], ymm15 + vmovdqa ymm0, ymmword ptr [rsp] + vmovdqa ymm2, ymmword ptr [rsp+40H] + vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+1H*20H] + vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+3H*20H] + vmovdqa ymmword ptr [rsp], ymm0 + vmovdqa ymmword ptr [rsp+40H], ymm2 + add rbx, 256 + mov qword ptr [rbp+90H], rbx + add rdi, 64 + sub rsi, 8 +final7blocks: + mov rbx, qword ptr [rbp+90H] + mov r15, qword ptr [rsp+80H] + movzx r13, byte ptr [rbp+78H] + movzx r12, byte ptr [rbp+88H] + test esi, 4H + je final3blocks + vbroadcasti32x4 zmm0, xmmword ptr [rcx] + vbroadcasti32x4 zmm1, xmmword ptr [rcx+1H*10H] + vmovdqa xmm12, xmmword ptr [rsp] + vmovdqa xmm13, xmmword ptr [rsp+40H] + vpunpckldq xmm14, xmm12, xmm13 + vpunpckhdq xmm15, xmm12, xmm13 + vpermq ymm14, ymm14, 0DCH + vpermq ymm15, ymm15, 0DCH + vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN] + vinserti64x4 zmm13, zmm14, ymm15, 01H + mov eax, 17476 + kmovw k2, eax + vpblendmd zmm13 {k2}, zmm13, zmm12 + vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov eax, 43690 + kmovw k3, eax + mov eax, 34952 + kmovw k4, eax + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop4: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+88H], eax + vmovdqa32 zmm2, zmm15 + vpbroadcastd zmm8, dword ptr [rsp+22H*4H] + vpblendmd zmm3 {k4}, zmm13, zmm8 + vmovups zmm8, zmmword ptr [r8+rdx-1H*40H] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-4H*10H], 01H + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-4H*10H], 02H + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-4H*10H], 03H + vmovups zmm9, zmmword ptr [r8+rdx-30H] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-3H*10H], 01H + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-3H*10H], 02H + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-3H*10H], 03H + vshufps zmm4, zmm8, zmm9, 136 + vshufps zmm5, zmm8, zmm9, 221 + vmovups zmm8, zmmword ptr [r8+rdx-20H] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-2H*10H], 01H + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-2H*10H], 02H + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-2H*10H], 03H + vmovups zmm9, zmmword ptr [r8+rdx-10H] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-1H*10H], 01H + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-1H*10H], 02H + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-1H*10H], 03H + vshufps zmm6, zmm8, zmm9, 136 + vshufps zmm7, zmm8, zmm9, 221 + vpshufd zmm6, zmm6, 93H + vpshufd zmm7, zmm7, 93H + mov al, 7 +roundloop4: + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 93H + vpshufd zmm3, zmm3, 4EH + vpshufd zmm2, zmm2, 39H + vpaddd zmm0, zmm0, zmm6 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm7 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 39H + vpshufd zmm3, zmm3, 4EH + vpshufd zmm2, zmm2, 93H + dec al + jz endroundloop4 + vshufps zmm8, zmm4, zmm5, 214 + vpshufd zmm9, zmm4, 0FH + vpshufd zmm4, zmm8, 39H + vshufps zmm8, zmm6, zmm7, 250 + vpblendmd zmm9 {k3}, zmm9, zmm8 + vpunpcklqdq zmm8, zmm7, zmm5 + vpblendmd zmm8 {k4}, zmm8, zmm6 + vpshufd zmm8, zmm8, 78H + vpunpckhdq zmm5, zmm5, zmm7 + vpunpckldq zmm6, zmm6, zmm5 + vpshufd zmm7, zmm6, 1EH + vmovdqa32 zmm5, zmm9 + vmovdqa32 zmm6, zmm8 + jmp roundloop4 +endroundloop4: + vpxord zmm0, zmm0, zmm2 + vpxord zmm1, zmm1, zmm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop4 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + vextracti128 xmmword ptr [rbx+20H], ymm0, 01H + vextracti128 xmmword ptr [rbx+30H], ymm1, 01H + vextracti32x4 xmmword ptr [rbx+4H*10H], zmm0, 02H + vextracti32x4 xmmword ptr [rbx+5H*10H], zmm1, 02H + vextracti32x4 xmmword ptr [rbx+6H*10H], zmm0, 03H + vextracti32x4 xmmword ptr [rbx+7H*10H], zmm1, 03H + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+40H] + vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+1H*10H] + vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+5H*10H] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+40H], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +final3blocks: + test esi, 2H + je final1block + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+10H] + vmovd xmm13, dword ptr [rsp] + vpinsrd xmm13, xmm13, dword ptr [rsp+40H], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2 + vmovd xmm14, dword ptr [rsp+4H] + vpinsrd xmm14, xmm14, dword ptr [rsp+44H], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2 + vinserti128 ymm13, ymm13, xmm14, 01H + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+88H], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV] + vpbroadcastd ymm8, dword ptr [rsp+88H] + vpblendd ymm3, ymm13, ymm8, 88H + vmovups ymm8, ymmword ptr [r8+rdx-40H] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H + vmovups ymm9, ymmword ptr [r8+rdx-30H] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-20H] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H + vmovups ymm9, ymmword ptr [r8+rdx-10H] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 93H + vpshufd ymm7, ymm7, 93H + mov al, 7 +roundloop2: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 93H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm2, ymm2, 39H + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 39H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm2, ymm2, 93H + dec al + jz endroundloop2 + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0FH + vpshufd ymm4, ymm8, 39H + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0AAH + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 88H + vpshufd ymm8, ymm8, 78H + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 1EH + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp roundloop2 +endroundloop2: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop2 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + vextracti128 xmmword ptr [rbx+20H], ymm0, 01H + vextracti128 xmmword ptr [rbx+30H], ymm1, 01H + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+40H] + vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+8H] + vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+48H] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+40H], xmm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +final1block: + test esi, 1H + je unwind + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+10H] + vmovd xmm14, dword ptr [rsp] + vpinsrd xmm14, xmm14, dword ptr [rsp+40H], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2 + vmovdqa xmm15, xmmword ptr [BLAKE3_IV] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop1: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vpinsrd xmm3, xmm14, eax, 3 + vmovdqa xmm2, xmm15 + vmovups xmm8, xmmword ptr [r8+rdx-40H] + vmovups xmm9, xmmword ptr [r8+rdx-30H] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-20H] + vmovups xmm9, xmmword ptr [r8+rdx-10H] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 93H + vpshufd xmm7, xmm7, 93H + mov al, 7 +roundloop1: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 93H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 39H + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 39H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 93H + dec al + jz endroundloop1 + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0FH + vpshufd xmm4, xmm8, 39H + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0AAH + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 88H + vpshufd xmm8, xmm8, 78H + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 1EH + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp roundloop1 +endroundloop1: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop1 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + jmp unwind + +_blake3_hash_many_avx512 ENDP +blake3_hash_many_avx512 ENDP + +ALIGN 16 +blake3_compress_in_place_avx512 PROC +_blake3_compress_in_place_avx512 PROC + sub rsp, 72 + vmovdqa xmmword ptr [rsp], xmm6 + vmovdqa xmmword ptr [rsp+10H], xmm7 + vmovdqa xmmword ptr [rsp+20H], xmm8 + vmovdqa xmmword ptr [rsp+30H], xmm9 + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+10H] + movzx eax, byte ptr [rsp+70H] + movzx r8d, r8b + shl rax, 32 + add r8, rax + vmovd xmm3, r9 + vmovd xmm4, r8 + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV] + vmovups xmm8, xmmword ptr [rdx] + vmovups xmm9, xmmword ptr [rdx+10H] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rdx+20H] + vmovups xmm9, xmmword ptr [rdx+30H] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 93H + vpshufd xmm7, xmm7, 93H + mov al, 7 +@@: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 93H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 39H + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 39H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 93H + dec al + jz @F + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0FH + vpshufd xmm4, xmm8, 39H + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0AAH + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 88H + vpshufd xmm8, xmm8, 78H + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 1EH + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp @B +@@: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vmovdqu xmmword ptr [rcx], xmm0 + vmovdqu xmmword ptr [rcx+10H], xmm1 + vmovdqa xmm6, xmmword ptr [rsp] + vmovdqa xmm7, xmmword ptr [rsp+10H] + vmovdqa xmm8, xmmword ptr [rsp+20H] + vmovdqa xmm9, xmmword ptr [rsp+30H] + add rsp, 72 + ret +_blake3_compress_in_place_avx512 ENDP +blake3_compress_in_place_avx512 ENDP + +ALIGN 16 +blake3_compress_xof_avx512 PROC +_blake3_compress_xof_avx512 PROC + sub rsp, 72 + vmovdqa xmmword ptr [rsp], xmm6 + vmovdqa xmmword ptr [rsp+10H], xmm7 + vmovdqa xmmword ptr [rsp+20H], xmm8 + vmovdqa xmmword ptr [rsp+30H], xmm9 + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+10H] + movzx eax, byte ptr [rsp+70H] + movzx r8d, r8b + mov r10, qword ptr [rsp+78H] + shl rax, 32 + add r8, rax + vmovd xmm3, r9 + vmovd xmm4, r8 + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV] + vmovups xmm8, xmmword ptr [rdx] + vmovups xmm9, xmmword ptr [rdx+10H] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rdx+20H] + vmovups xmm9, xmmword ptr [rdx+30H] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 93H + vpshufd xmm7, xmm7, 93H + mov al, 7 +@@: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 93H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 39H + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 39H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 93H + dec al + jz @F + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0FH + vpshufd xmm4, xmm8, 39H + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0AAH + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 88H + vpshufd xmm8, xmm8, 78H + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 1EH + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp @B +@@: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vpxor xmm2, xmm2, xmmword ptr [rcx] + vpxor xmm3, xmm3, xmmword ptr [rcx+10H] + vmovdqu xmmword ptr [r10], xmm0 + vmovdqu xmmword ptr [r10+10H], xmm1 + vmovdqu xmmword ptr [r10+20H], xmm2 + vmovdqu xmmword ptr [r10+30H], xmm3 + vmovdqa xmm6, xmmword ptr [rsp] + vmovdqa xmm7, xmmword ptr [rsp+10H] + vmovdqa xmm8, xmmword ptr [rsp+20H] + vmovdqa xmm9, xmmword ptr [rsp+30H] + add rsp, 72 + ret +_blake3_compress_xof_avx512 ENDP +blake3_compress_xof_avx512 ENDP + +_TEXT ENDS + +_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST' +ALIGN 64 +INDEX0: + dd 0, 1, 2, 3, 16, 17, 18, 19 + dd 8, 9, 10, 11, 24, 25, 26, 27 +INDEX1: + dd 4, 5, 6, 7, 20, 21, 22, 23 + dd 12, 13, 14, 15, 28, 29, 30, 31 +ADD0: + dd 0, 1, 2, 3, 4, 5, 6, 7 + dd 8, 9, 10, 11, 12, 13, 14, 15 +ADD1: + dd 1 +ADD16: + dd 16 +BLAKE3_BLOCK_LEN: + dd 64 +ALIGN 64 +BLAKE3_IV: +BLAKE3_IV_0: + dd 06A09E667H +BLAKE3_IV_1: + dd 0BB67AE85H +BLAKE3_IV_2: + dd 03C6EF372H +BLAKE3_IV_3: + dd 0A54FF53AH + +_RDATA ENDS +END diff --git a/src/third_party/blake3/blake3_dispatch.c b/src/third_party/blake3/blake3_dispatch.c index a4c0fa9..6518478 100644 --- a/src/third_party/blake3/blake3_dispatch.c +++ b/src/third_party/blake3/blake3_dispatch.c @@ -14,6 +14,8 @@ #endif #endif +#define MAYBE_UNUSED(x) (void)((x)) + #if defined(IS_X86) static uint64_t xgetbv() { #if defined(_MSC_VER) @@ -137,6 +139,7 @@ void blake3_compress_in_place(uint32_t cv[8], uint8_t flags) { #if defined(IS_X86) const enum cpu_feature features = get_cpu_features(); + MAYBE_UNUSED(features); #if !defined(BLAKE3_NO_AVX512) if (features & AVX512VL) { blake3_compress_in_place_avx512(cv, block, block_len, counter, flags); @@ -165,6 +168,7 @@ void blake3_compress_xof(const uint32_t cv[8], uint8_t out[64]) { #if defined(IS_X86) const enum cpu_feature features = get_cpu_features(); + MAYBE_UNUSED(features); #if !defined(BLAKE3_NO_AVX512) if (features & AVX512VL) { blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out); @@ -193,6 +197,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, uint8_t flags_start, uint8_t flags_end, uint8_t *out) { #if defined(IS_X86) const enum cpu_feature features = get_cpu_features(); + MAYBE_UNUSED(features); #if !defined(BLAKE3_NO_AVX512) if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, @@ -242,6 +247,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, size_t blake3_simd_degree(void) { #if defined(IS_X86) const enum cpu_feature features = get_cpu_features(); + MAYBE_UNUSED(features); #if !defined(BLAKE3_NO_AVX512) if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { return 16; diff --git a/src/third_party/blake3/blake3_sse2_x86-64_windows_msvc.asm b/src/third_party/blake3/blake3_sse2_x86-64_windows_msvc.asm index 72deb7b..ff9bb4d 100644 --- a/src/third_party/blake3/blake3_sse2_x86-64_windows_msvc.asm +++ b/src/third_party/blake3/blake3_sse2_x86-64_windows_msvc.asm @@ -2054,8 +2054,8 @@ _blake3_compress_in_place_sse2 PROC movzx r8d, r8b shl rax, 32 add r8, rax - movq xmm3, r9 - movq xmm4, r8 + movd xmm3, r9 + movd xmm4, r8 punpcklqdq xmm3, xmm4 movups xmm4, xmmword ptr [rdx] movups xmm5, xmmword ptr [rdx+10H] @@ -2186,8 +2186,8 @@ _blake3_compress_xof_sse2 PROC mov r10, qword ptr [rsp+0A8H] shl rax, 32 add r8, rax - movq xmm3, r9 - movq xmm4, r8 + movd xmm3, r9 + movd xmm4, r8 punpcklqdq xmm3, xmm4 movups xmm4, xmmword ptr [rdx] movups xmm5, xmmword ptr [rdx+10H] diff --git a/src/third_party/blake3/blake3_sse41_x86-64_windows_msvc.asm b/src/third_party/blake3/blake3_sse41_x86-64_windows_msvc.asm new file mode 100644 index 0000000..8966c7b --- /dev/null +++ b/src/third_party/blake3/blake3_sse41_x86-64_windows_msvc.asm @@ -0,0 +1,2089 @@ +public _blake3_hash_many_sse41 +public blake3_hash_many_sse41 +public blake3_compress_in_place_sse41 +public _blake3_compress_in_place_sse41 +public blake3_compress_xof_sse41 +public _blake3_compress_xof_sse41 + +_TEXT SEGMENT ALIGN(16) 'CODE' + +ALIGN 16 +blake3_hash_many_sse41 PROC +_blake3_hash_many_sse41 PROC + push r15 + push r14 + push r13 + push r12 + push rsi + push rdi + push rbx + push rbp + mov rbp, rsp + sub rsp, 528 + and rsp, 0FFFFFFFFFFFFFFC0H + movdqa xmmword ptr [rsp+170H], xmm6 + movdqa xmmword ptr [rsp+180H], xmm7 + movdqa xmmword ptr [rsp+190H], xmm8 + movdqa xmmword ptr [rsp+1A0H], xmm9 + movdqa xmmword ptr [rsp+1B0H], xmm10 + movdqa xmmword ptr [rsp+1C0H], xmm11 + movdqa xmmword ptr [rsp+1D0H], xmm12 + movdqa xmmword ptr [rsp+1E0H], xmm13 + movdqa xmmword ptr [rsp+1F0H], xmm14 + movdqa xmmword ptr [rsp+200H], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+68H] + movzx r9, byte ptr [rbp+70H] + neg r9d + movd xmm0, r9d + pshufd xmm0, xmm0, 00H + movdqa xmmword ptr [rsp+130H], xmm0 + movdqa xmm1, xmm0 + pand xmm1, xmmword ptr [ADD0] + pand xmm0, xmmword ptr [ADD1] + movdqa xmmword ptr [rsp+150H], xmm0 + movd xmm0, r8d + pshufd xmm0, xmm0, 00H + paddd xmm0, xmm1 + movdqa xmmword ptr [rsp+110H], xmm0 + pxor xmm0, xmmword ptr [CMP_MSB_MASK] + pxor xmm1, xmmword ptr [CMP_MSB_MASK] + pcmpgtd xmm1, xmm0 + shr r8, 32 + movd xmm2, r8d + pshufd xmm2, xmm2, 00H + psubd xmm2, xmm1 + movdqa xmmword ptr [rsp+120H], xmm2 + mov rbx, qword ptr [rbp+90H] + mov r15, rdx + shl r15, 6 + movzx r13d, byte ptr [rbp+78H] + movzx r12d, byte ptr [rbp+88H] + cmp rsi, 4 + jc final3blocks +outerloop4: + movdqu xmm3, xmmword ptr [rcx] + pshufd xmm0, xmm3, 00H + pshufd xmm1, xmm3, 55H + pshufd xmm2, xmm3, 0AAH + pshufd xmm3, xmm3, 0FFH + movdqu xmm7, xmmword ptr [rcx+10H] + pshufd xmm4, xmm7, 00H + pshufd xmm5, xmm7, 55H + pshufd xmm6, xmm7, 0AAH + pshufd xmm7, xmm7, 0FFH + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +innerloop4: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movdqu xmm8, xmmword ptr [r8+rdx-40H] + movdqu xmm9, xmmword ptr [r9+rdx-40H] + movdqu xmm10, xmmword ptr [r10+rdx-40H] + movdqu xmm11, xmmword ptr [r11+rdx-40H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp], xmm8 + movdqa xmmword ptr [rsp+10H], xmm9 + movdqa xmmword ptr [rsp+20H], xmm12 + movdqa xmmword ptr [rsp+30H], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-30H] + movdqu xmm9, xmmword ptr [r9+rdx-30H] + movdqu xmm10, xmmword ptr [r10+rdx-30H] + movdqu xmm11, xmmword ptr [r11+rdx-30H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+40H], xmm8 + movdqa xmmword ptr [rsp+50H], xmm9 + movdqa xmmword ptr [rsp+60H], xmm12 + movdqa xmmword ptr [rsp+70H], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-20H] + movdqu xmm9, xmmword ptr [r9+rdx-20H] + movdqu xmm10, xmmword ptr [r10+rdx-20H] + movdqu xmm11, xmmword ptr [r11+rdx-20H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+80H], xmm8 + movdqa xmmword ptr [rsp+90H], xmm9 + movdqa xmmword ptr [rsp+0A0H], xmm12 + movdqa xmmword ptr [rsp+0B0H], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-10H] + movdqu xmm9, xmmword ptr [r9+rdx-10H] + movdqu xmm10, xmmword ptr [r10+rdx-10H] + movdqu xmm11, xmmword ptr [r11+rdx-10H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0C0H], xmm8 + movdqa xmmword ptr [rsp+0D0H], xmm9 + movdqa xmmword ptr [rsp+0E0H], xmm12 + movdqa xmmword ptr [rsp+0F0H], xmm13 + movdqa xmm9, xmmword ptr [BLAKE3_IV_1] + movdqa xmm10, xmmword ptr [BLAKE3_IV_2] + movdqa xmm11, xmmword ptr [BLAKE3_IV_3] + movdqa xmm12, xmmword ptr [rsp+110H] + movdqa xmm13, xmmword ptr [rsp+120H] + movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN] + movd xmm15, eax + pshufd xmm15, xmm15, 00H + prefetcht0 byte ptr [r8+rdx+80H] + prefetcht0 byte ptr [r9+rdx+80H] + prefetcht0 byte ptr [r10+rdx+80H] + prefetcht0 byte ptr [r11+rdx+80H] + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+20H] + paddd xmm2, xmmword ptr [rsp+40H] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [BLAKE3_IV_0] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+10H] + paddd xmm1, xmmword ptr [rsp+30H] + paddd xmm2, xmmword ptr [rsp+50H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+80H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp+0C0H] + paddd xmm3, xmmword ptr [rsp+0E0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+90H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+0D0H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+20H] + paddd xmm1, xmmword ptr [rsp+30H] + paddd xmm2, xmmword ptr [rsp+70H] + paddd xmm3, xmmword ptr [rsp+40H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+60H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0D0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+10H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+90H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0B0H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp+0E0H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+30H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp+0D0H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+40H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+20H] + paddd xmm3, xmmword ptr [rsp+0E0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+60H] + paddd xmm1, xmmword ptr [rsp+90H] + paddd xmm2, xmmword ptr [rsp+0B0H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+50H] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0F0H] + paddd xmm3, xmmword ptr [rsp+10H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0A0H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+0E0H] + paddd xmm3, xmmword ptr [rsp+0D0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+70H] + paddd xmm1, xmmword ptr [rsp+90H] + paddd xmm2, xmmword ptr [rsp+30H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+40H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+50H] + paddd xmm3, xmmword ptr [rsp+10H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+20H] + paddd xmm2, xmmword ptr [rsp+80H] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0C0H] + paddd xmm1, xmmword ptr [rsp+90H] + paddd xmm2, xmmword ptr [rsp+0F0H] + paddd xmm3, xmmword ptr [rsp+0E0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0D0H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+0A0H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+70H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+20H] + paddd xmm1, xmmword ptr [rsp+30H] + paddd xmm2, xmmword ptr [rsp+10H] + paddd xmm3, xmmword ptr [rsp+40H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+90H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+80H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0E0H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp+0C0H] + paddd xmm3, xmmword ptr [rsp+10H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0D0H] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+20H] + paddd xmm3, xmmword ptr [rsp+40H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+30H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp+60H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0B0H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp+10H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0F0H] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+90H] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0E0H] + paddd xmm1, xmmword ptr [rsp+20H] + paddd xmm2, xmmword ptr [rsp+30H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0A0H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+40H] + paddd xmm3, xmmword ptr [rsp+0D0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + pxor xmm0, xmm8 + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + pxor xmm6, xmm14 + pxor xmm7, xmm15 + mov eax, r13d + jne innerloop4 + movdqa xmm9, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm9, xmm1 + movdqa xmm11, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm11, xmm3 + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 + punpckhqdq xmm1, xmm2 + movdqa xmm3, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm3, xmm11 + movdqu xmmword ptr [rbx], xmm0 + movdqu xmmword ptr [rbx+20H], xmm1 + movdqu xmmword ptr [rbx+40H], xmm9 + movdqu xmmword ptr [rbx+60H], xmm3 + movdqa xmm9, xmm4 + punpckldq xmm4, xmm5 + punpckhdq xmm9, xmm5 + movdqa xmm11, xmm6 + punpckldq xmm6, xmm7 + punpckhdq xmm11, xmm7 + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm7, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm7, xmm11 + movdqu xmmword ptr [rbx+10H], xmm4 + movdqu xmmword ptr [rbx+30H], xmm5 + movdqu xmmword ptr [rbx+50H], xmm9 + movdqu xmmword ptr [rbx+70H], xmm7 + movdqa xmm1, xmmword ptr [rsp+110H] + movdqa xmm0, xmm1 + paddd xmm1, xmmword ptr [rsp+150H] + movdqa xmmword ptr [rsp+110H], xmm1 + pxor xmm0, xmmword ptr [CMP_MSB_MASK] + pxor xmm1, xmmword ptr [CMP_MSB_MASK] + pcmpgtd xmm0, xmm1 + movdqa xmm1, xmmword ptr [rsp+120H] + psubd xmm1, xmm0 + movdqa xmmword ptr [rsp+120H], xmm1 + add rbx, 128 + add rdi, 32 + sub rsi, 4 + cmp rsi, 4 + jnc outerloop4 + test rsi, rsi + jne final3blocks +unwind: + movdqa xmm6, xmmword ptr [rsp+170H] + movdqa xmm7, xmmword ptr [rsp+180H] + movdqa xmm8, xmmword ptr [rsp+190H] + movdqa xmm9, xmmword ptr [rsp+1A0H] + movdqa xmm10, xmmword ptr [rsp+1B0H] + movdqa xmm11, xmmword ptr [rsp+1C0H] + movdqa xmm12, xmmword ptr [rsp+1D0H] + movdqa xmm13, xmmword ptr [rsp+1E0H] + movdqa xmm14, xmmword ptr [rsp+1F0H] + movdqa xmm15, xmmword ptr [rsp+200H] + mov rsp, rbp + pop rbp + pop rbx + pop rdi + pop rsi + pop r12 + pop r13 + pop r14 + pop r15 + ret +ALIGN 16 +final3blocks: + test esi, 2H + je final1block + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movaps xmm8, xmm0 + movaps xmm9, xmm1 + movd xmm13, dword ptr [rsp+110H] + pinsrd xmm13, dword ptr [rsp+120H], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2 + movaps xmmword ptr [rsp], xmm13 + movd xmm14, dword ptr [rsp+114H] + pinsrd xmm14, dword ptr [rsp+124H], 1 + pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2 + movaps xmmword ptr [rsp+10H], xmm14 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +innerloop2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV] + movaps xmm10, xmm2 + movups xmm4, xmmword ptr [r8+rdx-40H] + movups xmm5, xmmword ptr [r8+rdx-30H] + movaps xmm3, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm3, xmm5, 221 + movaps xmm5, xmm3 + movups xmm6, xmmword ptr [r8+rdx-20H] + movups xmm7, xmmword ptr [r8+rdx-10H] + movaps xmm3, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm3, xmm7, 221 + pshufd xmm7, xmm3, 93H + movups xmm12, xmmword ptr [r9+rdx-40H] + movups xmm13, xmmword ptr [r9+rdx-30H] + movaps xmm11, xmm12 + shufps xmm12, xmm13, 136 + shufps xmm11, xmm13, 221 + movaps xmm13, xmm11 + movups xmm14, xmmword ptr [r9+rdx-20H] + movups xmm15, xmmword ptr [r9+rdx-10H] + movaps xmm11, xmm14 + shufps xmm14, xmm15, 136 + pshufd xmm14, xmm14, 93H + shufps xmm11, xmm15, 221 + pshufd xmm15, xmm11, 93H + movaps xmm3, xmmword ptr [rsp] + movaps xmm11, xmmword ptr [rsp+10H] + pinsrd xmm3, eax, 3 + pinsrd xmm11, eax, 3 + mov al, 7 +roundloop2: + paddd xmm0, xmm4 + paddd xmm8, xmm12 + movaps xmmword ptr [rsp+20H], xmm4 + movaps xmmword ptr [rsp+30H], xmm12 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm12, xmmword ptr [ROT16] + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm5 + paddd xmm8, xmm13 + movaps xmmword ptr [rsp+40H], xmm5 + movaps xmmword ptr [rsp+50H], xmm13 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm13, xmmword ptr [ROT8] + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 93H + pshufd xmm8, xmm8, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm11, xmm11, 4EH + pshufd xmm2, xmm2, 39H + pshufd xmm10, xmm10, 39H + paddd xmm0, xmm6 + paddd xmm8, xmm14 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm7 + paddd xmm8, xmm15 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 39H + pshufd xmm8, xmm8, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm11, xmm11, 4EH + pshufd xmm2, xmm2, 93H + pshufd xmm10, xmm10, 93H + dec al + je endroundloop2 + movdqa xmm12, xmmword ptr [rsp+20H] + movdqa xmm5, xmmword ptr [rsp+40H] + pshufd xmm13, xmm12, 0FH + shufps xmm12, xmm5, 214 + pshufd xmm4, xmm12, 39H + movdqa xmm12, xmm6 + shufps xmm12, xmm7, 250 + pblendw xmm13, xmm12, 0CCH + movdqa xmm12, xmm7 + punpcklqdq xmm12, xmm5 + pblendw xmm12, xmm6, 0C0H + pshufd xmm12, xmm12, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmmword ptr [rsp+20H], xmm13 + movdqa xmmword ptr [rsp+40H], xmm12 + movdqa xmm5, xmmword ptr [rsp+30H] + movdqa xmm13, xmmword ptr [rsp+50H] + pshufd xmm6, xmm5, 0FH + shufps xmm5, xmm13, 214 + pshufd xmm12, xmm5, 39H + movdqa xmm5, xmm14 + shufps xmm5, xmm15, 250 + pblendw xmm6, xmm5, 0CCH + movdqa xmm5, xmm15 + punpcklqdq xmm5, xmm13 + pblendw xmm5, xmm14, 0C0H + pshufd xmm5, xmm5, 78H + punpckhdq xmm13, xmm15 + punpckldq xmm14, xmm13 + pshufd xmm15, xmm14, 1EH + movdqa xmm13, xmm6 + movdqa xmm14, xmm5 + movdqa xmm5, xmmword ptr [rsp+20H] + movdqa xmm6, xmmword ptr [rsp+40H] + jmp roundloop2 +endroundloop2: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm8, xmm10 + pxor xmm9, xmm11 + mov eax, r13d + cmp rdx, r15 + jne innerloop2 + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+10H], xmm1 + movups xmmword ptr [rbx+20H], xmm8 + movups xmmword ptr [rbx+30H], xmm9 + movdqa xmm0, xmmword ptr [rsp+130H] + movdqa xmm1, xmmword ptr [rsp+110H] + movdqa xmm2, xmmword ptr [rsp+120H] + movdqu xmm3, xmmword ptr [rsp+118H] + movdqu xmm4, xmmword ptr [rsp+128H] + blendvps xmm1, xmm3, xmm0 + blendvps xmm2, xmm4, xmm0 + movdqa xmmword ptr [rsp+110H], xmm1 + movdqa xmmword ptr [rsp+120H], xmm2 + add rdi, 16 + add rbx, 64 + sub rsi, 2 +final1block: + test esi, 1H + je unwind + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movd xmm13, dword ptr [rsp+110H] + pinsrd xmm13, dword ptr [rsp+120H], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2 + movaps xmm14, xmmword ptr [ROT8] + movaps xmm15, xmmword ptr [ROT16] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +innerloop1: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV] + movaps xmm3, xmm13 + pinsrd xmm3, eax, 3 + movups xmm4, xmmword ptr [r8+rdx-40H] + movups xmm5, xmmword ptr [r8+rdx-30H] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [r8+rdx-20H] + movups xmm7, xmmword ptr [r8+rdx-10H] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 93H + mov al, 7 +roundloop1: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 39H + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 93H + dec al + jz endroundloop1 + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0FH + pshufd xmm4, xmm8, 39H + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0CCH + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0C0H + pshufd xmm8, xmm8, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp roundloop1 +endroundloop1: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop1 + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+10H], xmm1 + jmp unwind +_blake3_hash_many_sse41 ENDP +blake3_hash_many_sse41 ENDP + +blake3_compress_in_place_sse41 PROC +_blake3_compress_in_place_sse41 PROC + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+10H], xmm7 + movdqa xmmword ptr [rsp+20H], xmm8 + movdqa xmmword ptr [rsp+30H], xmm9 + movdqa xmmword ptr [rsp+40H], xmm11 + movdqa xmmword ptr [rsp+50H], xmm14 + movdqa xmmword ptr [rsp+60H], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movaps xmm2, xmmword ptr [BLAKE3_IV] + movzx eax, byte ptr [rsp+0A0H] + movzx r8d, r8b + shl rax, 32 + add r8, rax + movd xmm3, r9 + movd xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+10H] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+20H] + movups xmm7, xmmword ptr [rdx+30H] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 93H + movaps xmm14, xmmword ptr [ROT8] + movaps xmm15, xmmword ptr [ROT16] + mov al, 7 +@@: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 39H + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 93H + dec al + jz @F + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0FH + pshufd xmm4, xmm8, 39H + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0CCH + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0C0H + pshufd xmm8, xmm8, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp @B +@@: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + movups xmmword ptr [rcx], xmm0 + movups xmmword ptr [rcx+10H], xmm1 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+10H] + movdqa xmm8, xmmword ptr [rsp+20H] + movdqa xmm9, xmmword ptr [rsp+30H] + movdqa xmm11, xmmword ptr [rsp+40H] + movdqa xmm14, xmmword ptr [rsp+50H] + movdqa xmm15, xmmword ptr [rsp+60H] + add rsp, 120 + ret +_blake3_compress_in_place_sse41 ENDP +blake3_compress_in_place_sse41 ENDP + +ALIGN 16 +blake3_compress_xof_sse41 PROC +_blake3_compress_xof_sse41 PROC + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+10H], xmm7 + movdqa xmmword ptr [rsp+20H], xmm8 + movdqa xmmword ptr [rsp+30H], xmm9 + movdqa xmmword ptr [rsp+40H], xmm11 + movdqa xmmword ptr [rsp+50H], xmm14 + movdqa xmmword ptr [rsp+60H], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movaps xmm2, xmmword ptr [BLAKE3_IV] + movzx eax, byte ptr [rsp+0A0H] + movzx r8d, r8b + mov r10, qword ptr [rsp+0A8H] + shl rax, 32 + add r8, rax + movd xmm3, r9 + movd xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+10H] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+20H] + movups xmm7, xmmword ptr [rdx+30H] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 93H + movaps xmm14, xmmword ptr [ROT8] + movaps xmm15, xmmword ptr [ROT16] + mov al, 7 +@@: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 39H + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 93H + dec al + jz @F + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0FH + pshufd xmm4, xmm8, 39H + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0CCH + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0C0H + pshufd xmm8, xmm8, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp @B +@@: + movdqu xmm4, xmmword ptr [rcx] + movdqu xmm5, xmmword ptr [rcx+10H] + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm2, xmm4 + pxor xmm3, xmm5 + movups xmmword ptr [r10], xmm0 + movups xmmword ptr [r10+10H], xmm1 + movups xmmword ptr [r10+20H], xmm2 + movups xmmword ptr [r10+30H], xmm3 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+10H] + movdqa xmm8, xmmword ptr [rsp+20H] + movdqa xmm9, xmmword ptr [rsp+30H] + movdqa xmm11, xmmword ptr [rsp+40H] + movdqa xmm14, xmmword ptr [rsp+50H] + movdqa xmm15, xmmword ptr [rsp+60H] + add rsp, 120 + ret +_blake3_compress_xof_sse41 ENDP +blake3_compress_xof_sse41 ENDP + +_TEXT ENDS + + +_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST' +ALIGN 64 +BLAKE3_IV: + dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH + +ADD0: + dd 0, 1, 2, 3 + +ADD1: + dd 4 dup (4) + +BLAKE3_IV_0: + dd 4 dup (6A09E667H) + +BLAKE3_IV_1: + dd 4 dup (0BB67AE85H) + +BLAKE3_IV_2: + dd 4 dup (3C6EF372H) + +BLAKE3_IV_3: + dd 4 dup (0A54FF53AH) + +BLAKE3_BLOCK_LEN: + dd 4 dup (64) + +ROT16: + db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 + +ROT8: + db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 + +CMP_MSB_MASK: + dd 8 dup(80000000H) + +_RDATA ENDS +END + diff --git a/src/third_party/doctest.h b/src/third_party/doctest.h index acbe6cd..7712dd6 100644 --- a/src/third_party/doctest.h +++ b/src/third_party/doctest.h @@ -48,8 +48,8 @@ #define DOCTEST_VERSION_MAJOR 2 #define DOCTEST_VERSION_MINOR 4 -#define DOCTEST_VERSION_PATCH 1 -#define DOCTEST_VERSION_STR "2.4.1" +#define DOCTEST_VERSION_PATCH 4 +#define DOCTEST_VERSION_STR "2.4.4" #define DOCTEST_VERSION \ (DOCTEST_VERSION_MAJOR * 10000 + DOCTEST_VERSION_MINOR * 100 + DOCTEST_VERSION_PATCH) @@ -368,7 +368,7 @@ DOCTEST_MSVC_SUPPRESS_WARNING(26812) // Prefer 'enum class' over 'enum' #define DOCTEST_BREAK_INTO_DEBUGGER() raise(SIGTRAP) #endif #elif defined(DOCTEST_PLATFORM_MAC) -#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) +#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || defined(__i386) #define DOCTEST_BREAK_INTO_DEBUGGER() __asm__("int $3\n" : :) #else #define DOCTEST_BREAK_INTO_DEBUGGER() __asm__("brk #0"); @@ -747,6 +747,7 @@ struct ContextOptions //!OCLINT too many fields bool gnu_file_line; // if line numbers should be surrounded with :x: and not (x): bool no_path_in_filenames; // if the path to files should be removed from the output bool no_line_numbers; // if source code line numbers should be omitted from the output + bool no_debug_output; // no output in the debug console when a debugger is attached bool no_skipped_summary; // don't print "skipped" in the summary !!! UNDOCUMENTED !!! bool no_time_in_output; // omit any time/timestamps from output !!! UNDOCUMENTED !!! @@ -806,7 +807,7 @@ namespace detail { } // namespace has_insertion_operator_impl template<class T> - using has_insertion_operator = has_insertion_operator_impl::check<T>; + using has_insertion_operator = has_insertion_operator_impl::check<const T>; DOCTEST_INTERFACE void my_memcpy(void* dest, const void* src, unsigned num); @@ -1035,6 +1036,7 @@ namespace detail { template <typename L, typename R> String stringifyBinaryExpr(const DOCTEST_REF_WRAP(L) lhs, const char* op, const DOCTEST_REF_WRAP(R) rhs) { + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) return toString(lhs) + op + toString(rhs); } @@ -1122,6 +1124,7 @@ namespace detail { #define DOCTEST_COMPARISON_RETURN_TYPE bool #else // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING #define DOCTEST_COMPARISON_RETURN_TYPE typename enable_if<can_use_op<L>::value || can_use_op<R>::value, bool>::type + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) inline bool eq(const char* lhs, const char* rhs) { return String(lhs) == String(rhs); } inline bool ne(const char* lhs, const char* rhs) { return String(lhs) != String(rhs); } inline bool lt(const char* lhs, const char* rhs) { return String(lhs) < String(rhs); } @@ -1541,12 +1544,24 @@ namespace detail { MessageBuilder() = delete; ~MessageBuilder(); + // the preferred way of chaining parameters for stringification template <typename T> - MessageBuilder& operator<<(const T& in) { + MessageBuilder& operator,(const T& in) { toStream(m_stream, in); return *this; } + // kept here just for backwards-compatibility - the comma operator should be preferred now + template <typename T> + MessageBuilder& operator<<(const T& in) { return this->operator,(in); } + + // the `,` operator has the lowest operator precedence - if `<<` is used by the user then + // the `,` operator will be called last which is not what we want and thus the `*` operator + // is used first (has higher operator precedence compared to `<<`) so that we guarantee that + // an operator of the MessageBuilder class is called first before the rest of the parameters + template <typename T> + MessageBuilder& operator*(const T& in) { return this->operator,(in); } + bool log(); void react(); }; @@ -1962,38 +1977,38 @@ int registerReporter(const char* name, int priority, bool isReporter) { DOCTEST_GLOBAL_NO_WARNINGS_END() typedef int DOCTEST_ANONYMOUS(_DOCTEST_ANON_FOR_SEMICOLON_) // for logging -#define DOCTEST_INFO(expression) \ +#define DOCTEST_INFO(...) \ DOCTEST_INFO_IMPL(DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), \ - DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), expression) + DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), __VA_ARGS__) -#define DOCTEST_INFO_IMPL(lambda_name, mb_name, s_name, expression) \ +#define DOCTEST_INFO_IMPL(lambda_name, mb_name, s_name, ...) \ DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(4626) \ auto lambda_name = [&](std::ostream* s_name) { \ doctest::detail::MessageBuilder mb_name(__FILE__, __LINE__, doctest::assertType::is_warn); \ mb_name.m_stream = s_name; \ - mb_name << expression; \ + mb_name * __VA_ARGS__; \ }; \ DOCTEST_MSVC_SUPPRESS_WARNING_POP \ auto DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_) = doctest::detail::MakeContextScope(lambda_name) -#define DOCTEST_CAPTURE(x) DOCTEST_INFO(#x " := " << x) +#define DOCTEST_CAPTURE(x) DOCTEST_INFO(#x " := ", x) -#define DOCTEST_ADD_AT_IMPL(type, file, line, mb, x) \ +#define DOCTEST_ADD_AT_IMPL(type, file, line, mb, ...) \ do { \ doctest::detail::MessageBuilder mb(file, line, doctest::assertType::type); \ - mb << x; \ + mb * __VA_ARGS__; \ DOCTEST_ASSERT_LOG_AND_REACT(mb); \ } while(false) // clang-format off -#define DOCTEST_ADD_MESSAGE_AT(file, line, x) DOCTEST_ADD_AT_IMPL(is_warn, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), x) -#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, x) DOCTEST_ADD_AT_IMPL(is_check, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), x) -#define DOCTEST_ADD_FAIL_AT(file, line, x) DOCTEST_ADD_AT_IMPL(is_require, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), x) +#define DOCTEST_ADD_MESSAGE_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_warn, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), __VA_ARGS__) +#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_check, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), __VA_ARGS__) +#define DOCTEST_ADD_FAIL_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_require, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), __VA_ARGS__) // clang-format on -#define DOCTEST_MESSAGE(x) DOCTEST_ADD_MESSAGE_AT(__FILE__, __LINE__, x) -#define DOCTEST_FAIL_CHECK(x) DOCTEST_ADD_FAIL_CHECK_AT(__FILE__, __LINE__, x) -#define DOCTEST_FAIL(x) DOCTEST_ADD_FAIL_AT(__FILE__, __LINE__, x) +#define DOCTEST_MESSAGE(...) DOCTEST_ADD_MESSAGE_AT(__FILE__, __LINE__, __VA_ARGS__) +#define DOCTEST_FAIL_CHECK(...) DOCTEST_ADD_FAIL_CHECK_AT(__FILE__, __LINE__, __VA_ARGS__) +#define DOCTEST_FAIL(...) DOCTEST_ADD_FAIL_AT(__FILE__, __LINE__, __VA_ARGS__) #define DOCTEST_TO_LVALUE(...) __VA_ARGS__ // Not removed to keep backwards compatibility. @@ -2036,12 +2051,12 @@ int registerReporter(const char* name, int priority, bool isReporter) { #define DOCTEST_REQUIRE_FALSE(...) DOCTEST_ASSERT_IMPLEMENT_1(DT_REQUIRE_FALSE, __VA_ARGS__) // clang-format off -#define DOCTEST_WARN_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN, cond); } while(false) -#define DOCTEST_CHECK_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK, cond); } while(false) -#define DOCTEST_REQUIRE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE, cond); } while(false) -#define DOCTEST_WARN_FALSE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN_FALSE, cond); } while(false) -#define DOCTEST_CHECK_FALSE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK_FALSE, cond); } while(false) -#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE_FALSE, cond); } while(false) +#define DOCTEST_WARN_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN, cond); } while(false) +#define DOCTEST_CHECK_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK, cond); } while(false) +#define DOCTEST_REQUIRE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE, cond); } while(false) +#define DOCTEST_WARN_FALSE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN_FALSE, cond); } while(false) +#define DOCTEST_CHECK_FALSE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK_FALSE, cond); } while(false) +#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE_FALSE, cond); } while(false) // clang-format on #define DOCTEST_ASSERT_THROWS_AS(expr, assert_type, message, ...) \ @@ -2051,8 +2066,8 @@ int registerReporter(const char* name, int priority, bool isReporter) { __LINE__, #expr, #__VA_ARGS__, message); \ try { \ DOCTEST_CAST_TO_VOID(expr) \ - } catch(const doctest::detail::remove_const< \ - doctest::detail::remove_reference<__VA_ARGS__>::type>::type&) { \ + } catch(const typename doctest::detail::remove_const< \ + typename doctest::detail::remove_reference<__VA_ARGS__>::type>::type&) { \ _DOCTEST_RB.translateException(); \ _DOCTEST_RB.m_threw_as = true; \ } catch(...) { _DOCTEST_RB.translateException(); } \ @@ -2103,21 +2118,21 @@ int registerReporter(const char* name, int priority, bool isReporter) { #define DOCTEST_CHECK_NOTHROW(...) DOCTEST_ASSERT_NOTHROW(DT_CHECK_NOTHROW, __VA_ARGS__) #define DOCTEST_REQUIRE_NOTHROW(...) DOCTEST_ASSERT_NOTHROW(DT_REQUIRE_NOTHROW, __VA_ARGS__) -#define DOCTEST_WARN_THROWS_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS(expr); } while(false) -#define DOCTEST_CHECK_THROWS_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS(expr); } while(false) -#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS(expr); } while(false) -#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS_AS(expr, ex); } while(false) -#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS_AS(expr, ex); } while(false) -#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS_AS(expr, ex); } while(false) -#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS_WITH(expr, with); } while(false) -#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS_WITH(expr, with); } while(false) -#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS_WITH(expr, with); } while(false) -#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS_WITH_AS(expr, with, ex); } while(false) -#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS_WITH_AS(expr, with, ex); } while(false) -#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, ex); } while(false) -#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_NOTHROW(expr); } while(false) -#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_NOTHROW(expr); } while(false) -#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_NOTHROW(expr); } while(false) +#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS(expr); } while(false) +#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS(expr); } while(false) +#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS(expr); } while(false) +#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_AS(expr, ex); } while(false) +#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_AS(expr, ex); } while(false) +#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_AS(expr, ex); } while(false) +#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_WITH(expr, with); } while(false) +#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_WITH(expr, with); } while(false) +#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_WITH(expr, with); } while(false) +#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_WITH_AS(expr, with, ex); } while(false) +#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_WITH_AS(expr, with, ex); } while(false) +#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, ex); } while(false) +#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_NOTHROW(expr); } while(false) +#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_NOTHROW(expr); } while(false) +#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_NOTHROW(expr); } while(false) // clang-format on #ifndef DOCTEST_CONFIG_SUPER_FAST_ASSERTS @@ -2230,21 +2245,21 @@ int registerReporter(const char* name, int priority, bool isReporter) { #define DOCTEST_CHECK_NOTHROW(...) (static_cast<void>(0)) #define DOCTEST_REQUIRE_NOTHROW(...) (static_cast<void>(0)) -#define DOCTEST_WARN_THROWS_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_THROWS_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0)) -#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0)) -#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0)) -#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0)) +#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0)) +#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0)) +#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0)) +#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0)) #else // DOCTEST_CONFIG_NO_EXCEPTIONS_BUT_WITH_ALL_ASSERTS @@ -2335,14 +2350,14 @@ int registerReporter(const char* name, int priority, bool isReporter) { #define DOCTEST_REGISTER_REPORTER(name, priority, reporter) #define DOCTEST_REGISTER_LISTENER(name, priority, reporter) -#define DOCTEST_INFO(x) (static_cast<void>(0)) +#define DOCTEST_INFO(...) (static_cast<void>(0)) #define DOCTEST_CAPTURE(x) (static_cast<void>(0)) -#define DOCTEST_ADD_MESSAGE_AT(file, line, x) (static_cast<void>(0)) -#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, x) (static_cast<void>(0)) -#define DOCTEST_ADD_FAIL_AT(file, line, x) (static_cast<void>(0)) -#define DOCTEST_MESSAGE(x) (static_cast<void>(0)) -#define DOCTEST_FAIL_CHECK(x) (static_cast<void>(0)) -#define DOCTEST_FAIL(x) (static_cast<void>(0)) +#define DOCTEST_ADD_MESSAGE_AT(file, line, ...) (static_cast<void>(0)) +#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, ...) (static_cast<void>(0)) +#define DOCTEST_ADD_FAIL_AT(file, line, ...) (static_cast<void>(0)) +#define DOCTEST_MESSAGE(...) (static_cast<void>(0)) +#define DOCTEST_FAIL_CHECK(...) (static_cast<void>(0)) +#define DOCTEST_FAIL(...) (static_cast<void>(0)) #define DOCTEST_WARN(...) (static_cast<void>(0)) #define DOCTEST_CHECK(...) (static_cast<void>(0)) @@ -2351,12 +2366,12 @@ int registerReporter(const char* name, int priority, bool isReporter) { #define DOCTEST_CHECK_FALSE(...) (static_cast<void>(0)) #define DOCTEST_REQUIRE_FALSE(...) (static_cast<void>(0)) -#define DOCTEST_WARN_MESSAGE(cond, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_MESSAGE(cond, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_MESSAGE(cond, msg) (static_cast<void>(0)) -#define DOCTEST_WARN_FALSE_MESSAGE(cond, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_FALSE_MESSAGE(cond, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, msg) (static_cast<void>(0)) +#define DOCTEST_WARN_MESSAGE(cond, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_MESSAGE(cond, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_MESSAGE(cond, ...) (static_cast<void>(0)) +#define DOCTEST_WARN_FALSE_MESSAGE(cond, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_FALSE_MESSAGE(cond, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, ...) (static_cast<void>(0)) #define DOCTEST_WARN_THROWS(...) (static_cast<void>(0)) #define DOCTEST_CHECK_THROWS(...) (static_cast<void>(0)) @@ -2374,21 +2389,21 @@ int registerReporter(const char* name, int priority, bool isReporter) { #define DOCTEST_CHECK_NOTHROW(...) (static_cast<void>(0)) #define DOCTEST_REQUIRE_NOTHROW(...) (static_cast<void>(0)) -#define DOCTEST_WARN_THROWS_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_THROWS_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0)) -#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0)) -#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0)) -#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0)) -#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0)) +#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0)) +#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0)) +#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0)) +#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0)) +#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0)) #define DOCTEST_WARN_EQ(...) (static_cast<void>(0)) #define DOCTEST_CHECK_EQ(...) (static_cast<void>(0)) @@ -2754,9 +2769,7 @@ DOCTEST_MAKE_STD_HEADERS_CLEAN_FROM_WARNINGS_ON_WALL_BEGIN #include <map> #include <exception> #include <stdexcept> -#ifdef DOCTEST_CONFIG_POSIX_SIGNALS #include <csignal> -#endif // DOCTEST_CONFIG_POSIX_SIGNALS #include <cfloat> #include <cctype> #include <cstdint> @@ -3071,6 +3084,7 @@ String::String() { String::~String() { if(!isOnStack()) delete[] data.ptr; + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) } String::String(const char* in) @@ -3112,6 +3126,7 @@ String& String::operator+=(const String& other) { if(total_size < len) { // append to the current stack space memcpy(buf + my_old_size, other.c_str(), other_size + 1); + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) setLast(last - total_size); } else { // alloc new chunk @@ -3153,6 +3168,7 @@ String& String::operator+=(const String& other) { return *this; } +// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) String String::operator+(const String& other) const { return String(*this) += other; } String::String(String&& other) { @@ -3307,6 +3323,7 @@ DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wnull-dereference") DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wnull-dereference") // depending on the current options this will remove the path of filenames const char* skipPathFromFilename(const char* file) { +#ifndef DOCTEST_CONFIG_DISABLE if(getContextOptions()->no_path_in_filenames) { auto back = std::strrchr(file, '\\'); auto forward = std::strrchr(file, '/'); @@ -3316,6 +3333,7 @@ const char* skipPathFromFilename(const char* file) { return forward + 1; } } +#endif // DOCTEST_CONFIG_DISABLE return file; } DOCTEST_CLANG_SUPPRESS_WARNING_POP @@ -3334,6 +3352,7 @@ IContextScope::~IContextScope() = default; #ifdef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING String toString(char* in) { return toString(static_cast<const char*>(in)); } +// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) String toString(const char* in) { return String("\"") + (in ? in : "{null string}") + "\""; } #endif // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING String toString(bool in) { return in ? "true" : "false"; } @@ -3406,6 +3425,7 @@ bool operator>(double lhs, const Approx& rhs) { return lhs > rhs.m_value && lhs bool operator>(const Approx& lhs, double rhs) { return lhs.m_value > rhs && lhs != rhs; } String toString(const Approx& in) { + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) return String("Approx( ") + doctest::toString(in.m_value) + " )"; } const ContextOptions* getContextOptions() { return DOCTEST_BRANCH_ON_DISABLED(nullptr, g_cs); } @@ -3698,11 +3718,15 @@ namespace detail { } bool TestCase::operator<(const TestCase& other) const { + // this will be used only to differentiate between test cases - not relevant for sorting if(m_line != other.m_line) return m_line < other.m_line; const int file_cmp = m_file.compare(other.m_file); if(file_cmp != 0) return file_cmp < 0; + const int name_cmp = strcmp(m_name, other.m_name); + if(name_cmp != 0) + return name_cmp < 0; return m_template_id < other.m_template_id; } } // namespace detail @@ -4009,24 +4033,40 @@ namespace { // Windows can easily distinguish between SO and SigSegV, // but SigInt, SigTerm, etc are handled differently. SignalDefs signalDefs[] = { - {EXCEPTION_ILLEGAL_INSTRUCTION, "SIGILL - Illegal instruction signal"}, - {EXCEPTION_STACK_OVERFLOW, "SIGSEGV - Stack overflow"}, - {EXCEPTION_ACCESS_VIOLATION, "SIGSEGV - Segmentation violation signal"}, - {EXCEPTION_INT_DIVIDE_BY_ZERO, "Divide by zero error"}, + {static_cast<DWORD>(EXCEPTION_ILLEGAL_INSTRUCTION), + "SIGILL - Illegal instruction signal"}, + {static_cast<DWORD>(EXCEPTION_STACK_OVERFLOW), "SIGSEGV - Stack overflow"}, + {static_cast<DWORD>(EXCEPTION_ACCESS_VIOLATION), + "SIGSEGV - Segmentation violation signal"}, + {static_cast<DWORD>(EXCEPTION_INT_DIVIDE_BY_ZERO), "Divide by zero error"}, }; struct FatalConditionHandler { static LONG CALLBACK handleException(PEXCEPTION_POINTERS ExceptionInfo) { - for(size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) { - if(ExceptionInfo->ExceptionRecord->ExceptionCode == signalDefs[i].id) { - reportFatal(signalDefs[i].name); - break; + // Multiple threads may enter this filter/handler at once. We want the error message to be printed on the + // console just once no matter how many threads have crashed. + static std::mutex mutex; + static bool execute = true; + { + std::lock_guard<std::mutex> lock(mutex); + if(execute) { + bool reported = false; + for(size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) { + if(ExceptionInfo->ExceptionRecord->ExceptionCode == signalDefs[i].id) { + reportFatal(signalDefs[i].name); + reported = true; + break; + } + } + if(reported == false) + reportFatal("Unhandled SEH exception caught"); + if(isDebuggerActive() && !g_cs->no_breaks) + DOCTEST_BREAK_INTO_DEBUGGER(); } + execute = false; } - // If its not an exception we care about, pass it along. - // This stops us from eating debugger breaks etc. - return EXCEPTION_CONTINUE_SEARCH; + std::exit(EXIT_FAILURE); } FatalConditionHandler() { @@ -4038,6 +4078,51 @@ namespace { previousTop = SetUnhandledExceptionFilter(handleException); // Pass in guarantee size to be filled SetThreadStackGuarantee(&guaranteeSize); + + // On Windows uncaught exceptions from another thread, exceptions from + // destructors, or calls to std::terminate are not a SEH exception + + // The terminal handler gets called when: + // - std::terminate is called FROM THE TEST RUNNER THREAD + // - an exception is thrown from a destructor FROM THE TEST RUNNER THREAD + original_terminate_handler = std::get_terminate(); + std::set_terminate([]() noexcept { + reportFatal("Terminate handler called"); + if(isDebuggerActive() && !g_cs->no_breaks) + DOCTEST_BREAK_INTO_DEBUGGER(); + std::exit(EXIT_FAILURE); // explicitly exit - otherwise the SIGABRT handler may be called as well + }); + + // SIGABRT is raised when: + // - std::terminate is called FROM A DIFFERENT THREAD + // - an exception is thrown from a destructor FROM A DIFFERENT THREAD + // - an uncaught exception is thrown FROM A DIFFERENT THREAD + prev_sigabrt_handler = std::signal(SIGABRT, [](int signal) noexcept { + if(signal == SIGABRT) { + reportFatal("SIGABRT - Abort (abnormal termination) signal"); + if(isDebuggerActive() && !g_cs->no_breaks) + DOCTEST_BREAK_INTO_DEBUGGER(); + std::exit(EXIT_FAILURE); + } + }); + + // The following settings are taken from google test, and more + // specifically from UnitTest::Run() inside of gtest.cc + + // the user does not want to see pop-up dialogs about crashes + prev_error_mode_1 = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT | + SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX); + // This forces the abort message to go to stderr in all circumstances. + prev_error_mode_2 = _set_error_mode(_OUT_TO_STDERR); + // In the debug version, Visual Studio pops up a separate dialog + // offering a choice to debug the aborted program - we want to disable that. + prev_abort_behavior = _set_abort_behavior(0x0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT); + // In debug mode, the Windows CRT can crash with an assertion over invalid + // input (e.g. passing an invalid file descriptor). The default handling + // for these assertions is to pop up a dialog and wait for user input. + // Instead ask the CRT to dump such assertions to stderr non-interactively. + prev_report_mode = _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG); + prev_report_file = _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR); } static void reset() { @@ -4045,7 +4130,13 @@ namespace { // Unregister handler and restore the old guarantee SetUnhandledExceptionFilter(previousTop); SetThreadStackGuarantee(&guaranteeSize); - previousTop = nullptr; + std::set_terminate(original_terminate_handler); + std::signal(SIGABRT, prev_sigabrt_handler); + SetErrorMode(prev_error_mode_1); + _set_error_mode(prev_error_mode_2); + _set_abort_behavior(prev_abort_behavior, _WRITE_ABORT_MSG | _CALL_REPORTFAULT); + _CrtSetReportMode(_CRT_ASSERT, prev_report_mode); + _CrtSetReportFile(_CRT_ASSERT, prev_report_file); isSet = false; } } @@ -4053,11 +4144,25 @@ namespace { ~FatalConditionHandler() { reset(); } private: + static UINT prev_error_mode_1; + static int prev_error_mode_2; + static unsigned int prev_abort_behavior; + static int prev_report_mode; + static _HFILE prev_report_file; + static void (*prev_sigabrt_handler)(int); + static std::terminate_handler original_terminate_handler; static bool isSet; static ULONG guaranteeSize; static LPTOP_LEVEL_EXCEPTION_FILTER previousTop; }; + UINT FatalConditionHandler::prev_error_mode_1; + int FatalConditionHandler::prev_error_mode_2; + unsigned int FatalConditionHandler::prev_abort_behavior; + int FatalConditionHandler::prev_report_mode; + _HFILE FatalConditionHandler::prev_report_file; + void (*FatalConditionHandler::prev_sigabrt_handler)(int); + std::terminate_handler FatalConditionHandler::original_terminate_handler; bool FatalConditionHandler::isSet = false; ULONG FatalConditionHandler::guaranteeSize = 0; LPTOP_LEVEL_EXCEPTION_FILTER FatalConditionHandler::previousTop = nullptr; @@ -4257,6 +4362,7 @@ namespace detail { // ################################################################################### DOCTEST_ASSERT_OUT_OF_TESTS(result.m_decomp); DOCTEST_ASSERT_IN_TESTS(result.m_decomp); + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) } MessageBuilder::MessageBuilder(const char* file, int line, assertType::Enum severity) { @@ -4979,7 +5085,6 @@ namespace { } // TODO: - // - log_contexts() // - log_message() // - respond to queries // - honor remaining options @@ -4993,7 +5098,6 @@ namespace { struct JUnitTestCaseData { -DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations") // gmtime static std::string getCurrentTimestamp() { // Beware, this is not reentrant because of backward compatibility issues // Also, UTC only, again because of backward compatibility (%z is C++11) @@ -5001,16 +5105,19 @@ DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations") // gmtime std::time(&rawtime); auto const timeStampSize = sizeof("2017-01-16T17:06:45Z"); - std::tm* timeInfo; - timeInfo = std::gmtime(&rawtime); + std::tm timeInfo; +#ifdef DOCTEST_PLATFORM_WINDOWS + gmtime_s(&timeInfo, &rawtime); +#else // DOCTEST_PLATFORM_WINDOWS + gmtime_r(&rawtime, &timeInfo); +#endif // DOCTEST_PLATFORM_WINDOWS char timeStamp[timeStampSize]; const char* const fmt = "%Y-%m-%dT%H:%M:%SZ"; - std::strftime(timeStamp, timeStampSize, fmt, timeInfo); + std::strftime(timeStamp, timeStampSize, fmt, &timeInfo); return std::string(timeStamp); } -DOCTEST_CLANG_SUPPRESS_WARNING_POP struct JUnitTestMessage { @@ -5175,12 +5282,27 @@ DOCTEST_CLANG_SUPPRESS_WARNING_POP << line(rb.m_line) << (opt.gnu_file_line ? ":" : "):") << std::endl; fulltext_log_assert_to_stream(os, rb); + log_contexts(os); testCaseData.addFailure(rb.m_decomp.c_str(), assertString(rb.m_at), os.str()); } void log_message(const MessageData&) override {} void test_case_skipped(const TestCaseData&) override {} + + void log_contexts(std::ostringstream& s) { + int num_contexts = get_num_active_contexts(); + if(num_contexts) { + auto contexts = get_active_contexts(); + + s << " logged: "; + for(int i = 0; i < num_contexts; ++i) { + s << (i == 0 ? "" : " "); + contexts[i]->stringify(&s); + s << std::endl; + } + } + } }; DOCTEST_REGISTER_REPORTER("junit", 0, JUnitReporter); @@ -5894,6 +6016,7 @@ void Context::parseArgs(int argc, const char* const* argv, bool withDefaults) { DOCTEST_PARSE_AS_BOOL_OR_FLAG("gnu-file-line", "gfl", gnu_file_line, !bool(DOCTEST_MSVC)); DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-path-filenames", "npf", no_path_in_filenames, false); DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-line-numbers", "nln", no_line_numbers, false); + DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-debug-output", "ndo", no_debug_output, false); DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-skipped-summary", "nss", no_skipped_summary, false); DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-time-in-output", "ntio", no_time_in_output, false); // clang-format on @@ -5951,6 +6074,7 @@ void Context::clearFilters() { // allows the user to override procedurally the int/bool options from the command line void Context::setOption(const char* option, int value) { setOption(option, toString(value).c_str()); + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) } // allows the user to override procedurally the string options from the command line @@ -6026,7 +6150,7 @@ int Context::run() { p->reporters_currently_used.insert(p->reporters_currently_used.begin(), curr.second(*g_cs)); #ifdef DOCTEST_PLATFORM_WINDOWS - if(isDebuggerActive()) + if(isDebuggerActive() && p->no_debug_output == false) p->reporters_currently_used.push_back(new DebugOutputWindowReporter(*g_cs)); #endif // DOCTEST_PLATFORM_WINDOWS diff --git a/src/third_party/fmt/core.h b/src/third_party/fmt/core.h index 031bf86..0a81e0c 100644 --- a/src/third_party/fmt/core.h +++ b/src/third_party/fmt/core.h @@ -18,7 +18,7 @@ #include <vector> // The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 70003 +#define FMT_VERSION 70103 #ifdef __clang__ # define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) @@ -57,6 +57,7 @@ # define FMT_MSC_VER 0 # define FMT_SUPPRESS_MSC_WARNING(n) #endif + #ifdef __has_feature # define FMT_HAS_FEATURE(x) __has_feature(x) #else @@ -64,7 +65,7 @@ #endif #if defined(__has_include) && !defined(__INTELLISENSE__) && \ - !(FMT_ICC_VERSION && FMT_ICC_VERSION < 1600) + (!FMT_ICC_VERSION || FMT_ICC_VERSION >= 1600) # define FMT_HAS_INCLUDE(x) __has_include(x) #else # define FMT_HAS_INCLUDE(x) 0 @@ -152,7 +153,7 @@ # if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VER >= 1900 # define FMT_DEPRECATED [[deprecated]] # else -# if defined(__GNUC__) || defined(__clang__) +# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) # define FMT_DEPRECATED __attribute__((deprecated)) # elif FMT_MSC_VER # define FMT_DEPRECATED __declspec(deprecated) @@ -177,9 +178,17 @@ # endif #endif -#ifndef FMT_BEGIN_NAMESPACE +#ifndef FMT_USE_INLINE_NAMESPACES # if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \ - FMT_MSC_VER >= 1900 + (FMT_MSC_VER >= 1900 && !_MANAGED) +# define FMT_USE_INLINE_NAMESPACES 1 +# else +# define FMT_USE_INLINE_NAMESPACES 0 +# endif +#endif + +#ifndef FMT_BEGIN_NAMESPACE +# if FMT_USE_INLINE_NAMESPACES # define FMT_INLINE_NAMESPACE inline namespace # define FMT_END_NAMESPACE \ } \ @@ -269,8 +278,7 @@ struct monostate {}; namespace detail { -// A helper function to suppress bogus "conditional expression is constant" -// warnings. +// A helper function to suppress "conditional expression is constant" warnings. template <typename T> constexpr T const_check(T value) { return value; } FMT_NORETURN FMT_API void assert_fail(const char* file, int line, @@ -299,7 +307,8 @@ template <typename T> struct std_string_view {}; #ifdef FMT_USE_INT128 // Do nothing. -#elif defined(__SIZEOF_INT128__) && !FMT_NVCC && !(FMT_CLANG_VERSION && FMT_MSC_VER) +#elif defined(__SIZEOF_INT128__) && !FMT_NVCC && \ + !(FMT_CLANG_VERSION && FMT_MSC_VER) # define FMT_USE_INT128 1 using int128_t = __int128_t; using uint128_t = __uint128_t; @@ -506,6 +515,18 @@ template <typename S> struct char_t_impl<S, enable_if_t<is_string<S>::value>> { using type = typename result::value_type; }; +// Reports a compile-time error if S is not a valid format string. +template <typename..., typename S, FMT_ENABLE_IF(!is_compile_string<S>::value)> +FMT_INLINE void check_format_string(const S&) { +#ifdef FMT_ENFORCE_COMPILE_STRING + static_assert(is_compile_string<S>::value, + "FMT_ENFORCE_COMPILE_STRING requires all format strings to use " + "FMT_STRING."); +#endif +} +template <typename..., typename S, FMT_ENABLE_IF(is_compile_string<S>::value)> +void check_format_string(S); + struct error_handler { constexpr error_handler() = default; constexpr error_handler(const error_handler&) = default; @@ -545,8 +566,9 @@ class basic_format_parse_context : private ErrorHandler { using iterator = typename basic_string_view<Char>::iterator; explicit constexpr basic_format_parse_context( - basic_string_view<Char> format_str, ErrorHandler eh = {}) - : ErrorHandler(eh), format_str_(format_str), next_arg_id_(0) {} + basic_string_view<Char> format_str, ErrorHandler eh = {}, + int next_arg_id = 0) + : ErrorHandler(eh), format_str_(format_str), next_arg_id_(next_arg_id) {} /** Returns an iterator to the beginning of the format string range being @@ -616,8 +638,24 @@ template <typename T, typename Context> using has_formatter = std::is_constructible<typename Context::template formatter_type<T>>; +// Checks whether T is a container with contiguous storage. +template <typename T> struct is_contiguous : std::false_type {}; +template <typename Char> +struct is_contiguous<std::basic_string<Char>> : std::true_type {}; + namespace detail { +// Extracts a reference to the container from back_insert_iterator. +template <typename Container> +inline Container& get_container(std::back_insert_iterator<Container> it) { + using bi_iterator = std::back_insert_iterator<Container>; + struct accessor : bi_iterator { + accessor(bi_iterator iter) : bi_iterator(iter) {} + using bi_iterator::container; + }; + return *accessor(it).container; +} + /** \rst A contiguous memory buffer with an optional growing ability. It is an internal @@ -640,6 +678,8 @@ template <typename T> class buffer { size_(sz), capacity_(cap) {} + ~buffer() = default; + /** Sets the buffer data and capacity. */ void set(T* buf_data, size_t buf_capacity) FMT_NOEXCEPT { ptr_ = buf_data; @@ -655,7 +695,6 @@ template <typename T> class buffer { buffer(const buffer&) = delete; void operator=(const buffer&) = delete; - virtual ~buffer() = default; T* begin() FMT_NOEXCEPT { return ptr_; } T* end() FMT_NOEXCEPT { return ptr_ + size_; } @@ -675,24 +714,26 @@ template <typename T> class buffer { /** Returns a pointer to the buffer data. */ const T* data() const FMT_NOEXCEPT { return ptr_; } - /** - Resizes the buffer. If T is a POD type new elements may not be initialized. - */ - void resize(size_t new_size) { - reserve(new_size); - size_ = new_size; - } - /** Clears this buffer. */ void clear() { size_ = 0; } - /** Reserves space to store at least *capacity* elements. */ - void reserve(size_t new_capacity) { + // Tries resizing the buffer to contain *count* elements. If T is a POD type + // the new elements may not be initialized. + void try_resize(size_t count) { + try_reserve(count); + size_ = count <= capacity_ ? count : capacity_; + } + + // Tries increasing the buffer capacity to *new_capacity*. It can increase the + // capacity by a smaller amount than requested but guarantees there is space + // for at least one additional element either by increasing the capacity or by + // flushing the buffer if it is full. + void try_reserve(size_t new_capacity) { if (new_capacity > capacity_) grow(new_capacity); } void push_back(const T& value) { - reserve(size_ + 1); + try_reserve(size_ + 1); ptr_[size_++] = value; } @@ -705,32 +746,150 @@ template <typename T> class buffer { } }; -// A container-backed buffer. +struct buffer_traits { + explicit buffer_traits(size_t) {} + size_t count() const { return 0; } + size_t limit(size_t size) { return size; } +}; + +class fixed_buffer_traits { + private: + size_t count_ = 0; + size_t limit_; + + public: + explicit fixed_buffer_traits(size_t limit) : limit_(limit) {} + size_t count() const { return count_; } + size_t limit(size_t size) { + size_t n = limit_ > count_ ? limit_ - count_ : 0; + count_ += size; + return size < n ? size : n; + } +}; + +// A buffer that writes to an output iterator when flushed. +template <typename OutputIt, typename T, typename Traits = buffer_traits> +class iterator_buffer final : public Traits, public buffer<T> { + private: + OutputIt out_; + enum { buffer_size = 256 }; + T data_[buffer_size]; + + protected: + void grow(size_t) final FMT_OVERRIDE { + if (this->size() == buffer_size) flush(); + } + void flush(); + + public: + explicit iterator_buffer(OutputIt out, size_t n = buffer_size) + : Traits(n), + buffer<T>(data_, 0, buffer_size), + out_(out) {} + ~iterator_buffer() { flush(); } + + OutputIt out() { + flush(); + return out_; + } + size_t count() const { return Traits::count() + this->size(); } +}; + +template <typename T> class iterator_buffer<T*, T> final : public buffer<T> { + protected: + void grow(size_t) final FMT_OVERRIDE {} + + public: + explicit iterator_buffer(T* out, size_t = 0) : buffer<T>(out, 0, ~size_t()) {} + + T* out() { return &*this->end(); } +}; + +// A buffer that writes to a container with the contiguous storage. template <typename Container> -class container_buffer : public buffer<typename Container::value_type> { +class iterator_buffer<std::back_insert_iterator<Container>, + enable_if_t<is_contiguous<Container>::value, + typename Container::value_type>> + final : public buffer<typename Container::value_type> { private: Container& container_; protected: - void grow(size_t capacity) FMT_OVERRIDE { + void grow(size_t capacity) final FMT_OVERRIDE { container_.resize(capacity); this->set(&container_[0], capacity); } public: - explicit container_buffer(Container& c) + explicit iterator_buffer(Container& c) : buffer<typename Container::value_type>(c.size()), container_(c) {} + explicit iterator_buffer(std::back_insert_iterator<Container> out, size_t = 0) + : iterator_buffer(get_container(out)) {} + std::back_insert_iterator<Container> out() { + return std::back_inserter(container_); + } }; -// Extracts a reference to the container from back_insert_iterator. -template <typename Container> -inline Container& get_container(std::back_insert_iterator<Container> it) { - using bi_iterator = std::back_insert_iterator<Container>; - struct accessor : bi_iterator { - accessor(bi_iterator iter) : bi_iterator(iter) {} - using bi_iterator::container; - }; - return *accessor(it).container; +// A buffer that counts the number of code units written discarding the output. +template <typename T = char> class counting_buffer final : public buffer<T> { + private: + enum { buffer_size = 256 }; + T data_[buffer_size]; + size_t count_ = 0; + + protected: + void grow(size_t) final FMT_OVERRIDE { + if (this->size() != buffer_size) return; + count_ += this->size(); + this->clear(); + } + + public: + counting_buffer() : buffer<T>(data_, 0, buffer_size) {} + + size_t count() { return count_ + this->size(); } +}; + +// An output iterator that appends to the buffer. +// It is used to reduce symbol sizes for the common case. +template <typename T> +class buffer_appender : public std::back_insert_iterator<buffer<T>> { + using base = std::back_insert_iterator<buffer<T>>; + + public: + explicit buffer_appender(buffer<T>& buf) : base(buf) {} + buffer_appender(base it) : base(it) {} + + buffer_appender& operator++() { + base::operator++(); + return *this; + } + + buffer_appender operator++(int) { + buffer_appender tmp = *this; + ++*this; + return tmp; + } +}; + +// Maps an output iterator into a buffer. +template <typename T, typename OutputIt> +iterator_buffer<OutputIt, T> get_buffer(OutputIt); +template <typename T> buffer<T>& get_buffer(buffer_appender<T>); + +template <typename OutputIt> OutputIt get_buffer_init(OutputIt out) { + return out; +} +template <typename T> buffer<T>& get_buffer_init(buffer_appender<T> out) { + return get_container(out); +} + +template <typename Buffer> +auto get_iterator(Buffer& buf) -> decltype(buf.out()) { + return buf.out(); +} +template <typename T> buffer_appender<T> get_iterator(buffer<T>& buf) { + return buffer_appender<T>(buf); } template <typename T, typename Char = char, typename Enable = void> @@ -759,7 +918,8 @@ template <typename Char> struct named_arg_info { template <typename T, typename Char, size_t NUM_ARGS, size_t NUM_NAMED_ARGS> struct arg_data { // args_[0].named_args points to named_args_ to avoid bloating format_args. - T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : 1)]; + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : +1)]; named_arg_info<Char> named_args_[NUM_NAMED_ARGS]; template <typename... U> @@ -771,7 +931,8 @@ struct arg_data { template <typename T, typename Char, size_t NUM_ARGS> struct arg_data<T, Char, NUM_ARGS, 0> { - T args_[NUM_ARGS != 0 ? NUM_ARGS : 1]; + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + T args_[NUM_ARGS != 0 ? NUM_ARGS : +1]; template <typename... U> FMT_INLINE arg_data(const U&... init) : args_{init...} {} @@ -959,6 +1120,8 @@ enum { long_short = sizeof(long) == sizeof(int) }; using long_type = conditional_t<long_short, int, long long>; using ulong_type = conditional_t<long_short, unsigned, unsigned long long>; +struct unformattable {}; + // Maps formatting arguments to core types. template <typename Context> struct arg_mapper { using char_type = typename Context::char_type; @@ -1067,15 +1230,7 @@ template <typename Context> struct arg_mapper { return map(val.value); } - int map(...) { - constexpr bool formattable = sizeof(Context) == 0; - static_assert( - formattable, - "Cannot format argument. To make type T formattable provide a " - "formatter<T> specialization: " - "https://fmt.dev/latest/api.html#formatting-user-defined-types"); - return 0; - } + unformattable map(...) { return {}; } }; // A type constant after applying arg_mapper<Context>. @@ -1199,15 +1354,25 @@ FMT_CONSTEXPR_DECL FMT_INLINE auto visit_format_arg( return vis(monostate()); } -// Checks whether T is a container with contiguous storage. -template <typename T> struct is_contiguous : std::false_type {}; -template <typename Char> -struct is_contiguous<std::basic_string<Char>> : std::true_type {}; -template <typename Char> -struct is_contiguous<detail::buffer<Char>> : std::true_type {}; +template <typename T> struct formattable : std::false_type {}; namespace detail { +// A workaround for gcc 4.8 to make void_t work in a SFINAE context. +template <typename... Ts> struct void_t_impl { using type = void; }; +template <typename... Ts> +using void_t = typename detail::void_t_impl<Ts...>::type; + +template <typename It, typename T, typename Enable = void> +struct is_output_iterator : std::false_type {}; + +template <typename It, typename T> +struct is_output_iterator< + It, T, + void_t<typename std::iterator_traits<It>::iterator_category, + decltype(*std::declval<It>() = std::declval<T>())>> + : std::true_type {}; + template <typename OutputIt> struct is_back_insert_iterator : std::false_type {}; template <typename Container> @@ -1219,6 +1384,9 @@ struct is_contiguous_back_insert_iterator : std::false_type {}; template <typename Container> struct is_contiguous_back_insert_iterator<std::back_insert_iterator<Container>> : is_contiguous<Container> {}; +template <typename Char> +struct is_contiguous_back_insert_iterator<buffer_appender<Char>> + : std::true_type {}; // A type-erased reference to an std::locale to avoid heavy <locale> include. class locale_ref { @@ -1250,13 +1418,24 @@ FMT_CONSTEXPR basic_format_arg<Context> make_arg(const T& value) { return arg; } +template <typename T> int check(unformattable) { + static_assert( + formattable<T>(), + "Cannot format an argument. To make type T formattable provide a " + "formatter<T> specialization: https://fmt.dev/latest/api.html#udt"); + return 0; +} +template <typename T, typename U> inline const U& check(const U& val) { + return val; +} + // The type template parameter is there to avoid an ODR violation when using // a fallback formatter in one translation unit and an implicit conversion in // another (not recommended). template <bool IS_PACKED, typename Context, type, typename T, FMT_ENABLE_IF(IS_PACKED)> inline value<Context> make_arg(const T& val) { - return arg_mapper<Context>().map(val); + return check<T>(arg_mapper<Context>().map(val)); } template <bool IS_PACKED, typename Context, type, typename T, @@ -1356,13 +1535,13 @@ template <typename OutputIt, typename Char> class basic_format_context { template <typename Char> using buffer_context = - basic_format_context<std::back_insert_iterator<detail::buffer<Char>>, Char>; + basic_format_context<detail::buffer_appender<Char>, Char>; using format_context = buffer_context<char>; using wformat_context = buffer_context<wchar_t>; -// Workaround a bug in gcc: https://stackoverflow.com/q/62767544/471164. +// Workaround an alias issue: https://stackoverflow.com/q/62767544/471164. #define FMT_BUFFER_CONTEXT(Char) \ - basic_format_context<std::back_insert_iterator<detail::buffer<Char>>, Char> + basic_format_context<detail::buffer_appender<Char>, Char> /** \rst @@ -1414,7 +1593,7 @@ class format_arg_store /** \rst - Constructs an `~fmt::format_arg_store` object that contains references to + Constructs a `~fmt::format_arg_store` object that contains references to arguments and can be implicitly converted to `~fmt::format_args`. `Context` can be omitted in which case it defaults to `~fmt::context`. See `~fmt::arg` for lifetime considerations. @@ -1428,6 +1607,27 @@ inline format_arg_store<Context, Args...> make_format_args( /** \rst + Constructs a `~fmt::format_arg_store` object that contains references + to arguments and can be implicitly converted to `~fmt::format_args`. + If ``format_str`` is a compile-time string then `make_args_checked` checks + its validity at compile time. + \endrst + */ +template <typename... Args, typename S, typename Char = char_t<S>> +inline auto make_args_checked(const S& format_str, + const remove_reference_t<Args>&... args) + -> format_arg_store<buffer_context<Char>, remove_reference_t<Args>...> { + static_assert( + detail::count<( + std::is_base_of<detail::view, remove_reference_t<Args>>::value && + std::is_reference<Args>::value)...>() == 0, + "passing views as lvalues is disallowed"); + detail::check_format_string<Args...>(format_str); + return {args...}; +} + +/** + \rst Returns a named argument to be used in a formatting function. It should only be used in a call to a formatting function. @@ -1729,7 +1929,14 @@ template <typename Context> class basic_format_args { } }; -/** An alias to ``basic_format_args<context>``. */ +#ifdef FMT_ARM_ABI_COMPATIBILITY +/** An alias to ``basic_format_args<format_context>``. */ +// Separate types would result in shorter symbols but break ABI compatibility +// between clang and gcc on ARM (#1919). +using format_args = basic_format_args<format_context>; +using wformat_args = basic_format_args<wformat_context>; +#else +// DEPRECATED! These are kept for ABI compatibility. // It is a separate type rather than an alias to make symbols readable. struct format_args : basic_format_args<format_context> { template <typename... Args> @@ -1738,31 +1945,9 @@ struct format_args : basic_format_args<format_context> { struct wformat_args : basic_format_args<wformat_context> { using basic_format_args::basic_format_args; }; - -namespace detail { - -// Reports a compile-time error if S is not a valid format string. -template <typename..., typename S, FMT_ENABLE_IF(!is_compile_string<S>::value)> -FMT_INLINE void check_format_string(const S&) { -#ifdef FMT_ENFORCE_COMPILE_STRING - static_assert(is_compile_string<S>::value, - "FMT_ENFORCE_COMPILE_STRING requires all format strings to use " - "FMT_STRING."); #endif -} -template <typename..., typename S, FMT_ENABLE_IF(is_compile_string<S>::value)> -void check_format_string(S); -template <typename... Args, typename S, typename Char = char_t<S>> -inline format_arg_store<buffer_context<Char>, remove_reference_t<Args>...> -make_args_checked(const S& format_str, - const remove_reference_t<Args>&... args) { - static_assert(count<(std::is_base_of<view, remove_reference_t<Args>>::value && - std::is_reference<Args>::value)...>() == 0, - "passing views as lvalues is disallowed"); - check_format_string<Args...>(format_str); - return {args...}; -} +namespace detail { template <typename Char, FMT_ENABLE_IF(!std::is_same<Char, char>::value)> std::basic_string<Char> vformat( @@ -1772,9 +1957,10 @@ std::basic_string<Char> vformat( FMT_API std::string vformat(string_view format_str, format_args args); template <typename Char> -typename FMT_BUFFER_CONTEXT(Char)::iterator vformat_to( +void vformat_to( buffer<Char>& buf, basic_string_view<Char> format_str, - basic_format_args<FMT_BUFFER_CONTEXT(type_identity_t<Char>)> args); + basic_format_args<FMT_BUFFER_CONTEXT(type_identity_t<Char>)> args, + detail::locale_ref loc = {}); template <typename Char, typename Args, FMT_ENABLE_IF(!std::is_same<Char, char>::value)> @@ -1789,26 +1975,80 @@ inline void vprint_mojibake(std::FILE*, string_view, format_args) {} /** Formats a string and writes the output to ``out``. */ // GCC 8 and earlier cannot handle std::back_insert_iterator<Container> with // vformat_to<ArgFormatter>(...) overload, so SFINAE on iterator type instead. -template < - typename OutputIt, typename S, typename Char = char_t<S>, - FMT_ENABLE_IF(detail::is_contiguous_back_insert_iterator<OutputIt>::value)> -OutputIt vformat_to( - OutputIt out, const S& format_str, - basic_format_args<buffer_context<type_identity_t<Char>>> args) { - auto& c = detail::get_container(out); - detail::container_buffer<remove_reference_t<decltype(c)>> buf(c); +template <typename OutputIt, typename S, typename Char = char_t<S>, + bool enable = detail::is_output_iterator<OutputIt, Char>::value> +auto vformat_to(OutputIt out, const S& format_str, + basic_format_args<buffer_context<type_identity_t<Char>>> args) + -> typename std::enable_if<enable, OutputIt>::type { + decltype(detail::get_buffer<Char>(out)) buf(detail::get_buffer_init(out)); detail::vformat_to(buf, to_string_view(format_str), args); - return out; + return detail::get_iterator(buf); +} + +/** + \rst + Formats arguments, writes the result to the output iterator ``out`` and returns + the iterator past the end of the output range. + + **Example**:: + + std::vector<char> out; + fmt::format_to(std::back_inserter(out), "{}", 42); + \endrst + */ +// We cannot use FMT_ENABLE_IF because of a bug in gcc 8.3. +template <typename OutputIt, typename S, typename... Args, + bool enable = detail::is_output_iterator<OutputIt, char_t<S>>::value> +inline auto format_to(OutputIt out, const S& format_str, Args&&... args) -> + typename std::enable_if<enable, OutputIt>::type { + const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...); + return vformat_to(out, to_string_view(format_str), vargs); +} + +template <typename OutputIt> struct format_to_n_result { + /** Iterator past the end of the output range. */ + OutputIt out; + /** Total (not truncated) output size. */ + size_t size; +}; + +template <typename OutputIt, typename Char, typename... Args, + FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value)> +inline format_to_n_result<OutputIt> vformat_to_n( + OutputIt out, size_t n, basic_string_view<Char> format_str, + basic_format_args<buffer_context<type_identity_t<Char>>> args) { + detail::iterator_buffer<OutputIt, Char, detail::fixed_buffer_traits> buf(out, + n); + detail::vformat_to(buf, format_str, args); + return {buf.out(), buf.count()}; +} + +/** + \rst + Formats arguments, writes up to ``n`` characters of the result to the output + iterator ``out`` and returns the total output size and the iterator past the + end of the output range. + \endrst + */ +template <typename OutputIt, typename S, typename... Args, + bool enable = detail::is_output_iterator<OutputIt, char_t<S>>::value> +inline auto format_to_n(OutputIt out, size_t n, const S& format_str, + const Args&... args) -> + typename std::enable_if<enable, format_to_n_result<OutputIt>>::type { + const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...); + return vformat_to_n(out, n, to_string_view(format_str), vargs); } -template <typename Container, typename S, typename... Args, - FMT_ENABLE_IF( - is_contiguous<Container>::value&& detail::is_string<S>::value)> -inline std::back_insert_iterator<Container> format_to( - std::back_insert_iterator<Container> out, const S& format_str, - Args&&... args) { - return vformat_to(out, to_string_view(format_str), - detail::make_args_checked<Args...>(format_str, args...)); +/** + Returns the number of characters in the output of + ``format(format_str, args...)``. + */ +template <typename... Args> +inline size_t formatted_size(string_view format_str, Args&&... args) { + const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...); + detail::counting_buffer<> buf; + detail::vformat_to(buf, format_str, vargs); + return buf.count(); } template <typename S, typename Char = char_t<S>> @@ -1832,7 +2072,7 @@ FMT_INLINE std::basic_string<Char> vformat( // std::basic_string<char_t<S>> to reduce the symbol size. template <typename S, typename... Args, typename Char = char_t<S>> FMT_INLINE std::basic_string<Char> format(const S& format_str, Args&&... args) { - const auto& vargs = detail::make_args_checked<Args...>(format_str, args...); + const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...); return detail::vformat(to_string_view(format_str), vargs); } @@ -1852,7 +2092,7 @@ FMT_API void vprint(std::FILE*, string_view, format_args); */ template <typename S, typename... Args, typename Char = char_t<S>> inline void print(std::FILE* f, const S& format_str, Args&&... args) { - const auto& vargs = detail::make_args_checked<Args...>(format_str, args...); + const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...); return detail::is_unicode<Char>() ? vprint(f, to_string_view(format_str), vargs) : detail::vprint_mojibake(f, to_string_view(format_str), vargs); @@ -1871,7 +2111,7 @@ inline void print(std::FILE* f, const S& format_str, Args&&... args) { */ template <typename S, typename... Args, typename Char = char_t<S>> inline void print(const S& format_str, Args&&... args) { - const auto& vargs = detail::make_args_checked<Args...>(format_str, args...); + const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...); return detail::is_unicode<Char>() ? vprint(to_string_view(format_str), vargs) : detail::vprint_mojibake(stdout, to_string_view(format_str), diff --git a/src/third_party/fmt/format-inl.h b/src/third_party/fmt/format-inl.h index d8c9c8a..8f2fe73 100644 --- a/src/third_party/fmt/format-inl.h +++ b/src/third_party/fmt/format-inl.h @@ -13,32 +13,19 @@ #include <climits> #include <cmath> #include <cstdarg> -#include <cstring> // for std::memmove +#include <cstring> // std::memmove #include <cwchar> #include <exception> -#include "format.h" -#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR # include <locale> #endif #ifdef _WIN32 -# if !defined(NOMINMAX) && !defined(WIN32_LEAN_AND_MEAN) -# define NOMINMAX -# define WIN32_LEAN_AND_MEAN -# include <windows.h> -# undef WIN32_LEAN_AND_MEAN -# undef NOMINMAX -# else -# include <windows.h> -# endif -# include <io.h> +# include <io.h> // _isatty #endif -#ifdef _MSC_VER -# pragma warning(push) -# pragma warning(disable : 4702) // unreachable code -#endif +#include "format.h" // Dummy implementations of strerror_r and strerror_s called if corresponding // system functions are not available. @@ -79,8 +66,8 @@ inline int fmt_snprintf(char* buffer, size_t size, const char* format, ...) { // ERANGE - buffer is not large enough to store the error message // other - failure // Buffer should be at least of size 1. -FMT_FUNC int safe_strerror(int error_code, char*& buffer, - size_t buffer_size) FMT_NOEXCEPT { +inline int safe_strerror(int error_code, char*& buffer, + size_t buffer_size) FMT_NOEXCEPT { FMT_ASSERT(buffer != nullptr && buffer_size != 0, "invalid buffer"); class dispatcher { @@ -145,7 +132,7 @@ FMT_FUNC void format_error_code(detail::buffer<char>& out, int error_code, // Report error code making sure that the output fits into // inline_buffer_size to avoid dynamic memory allocation and potential // bad_alloc. - out.resize(0); + out.try_resize(0); static const char SEP[] = ": "; static const char ERROR_STR[] = "error "; // Subtract 2 to account for terminating null characters in SEP and ERROR_STR. @@ -156,7 +143,7 @@ FMT_FUNC void format_error_code(detail::buffer<char>& out, int error_code, ++error_code_size; } error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); - auto it = std::back_inserter(out); + auto it = buffer_appender<char>(out); if (message.size() <= inline_buffer_size - error_code_size) format_to(it, "{}{}", message, SEP); format_to(it, "{}{}", ERROR_STR, error_code); @@ -173,8 +160,8 @@ FMT_FUNC void report_error(format_func func, int error_code, } // A wrapper around fwrite that throws on error. -FMT_FUNC void fwrite_fully(const void* ptr, size_t size, size_t count, - FILE* stream) { +inline void fwrite_fully(const void* ptr, size_t size, size_t count, + FILE* stream) { size_t written = std::fwrite(ptr, size, count, stream); if (written < count) FMT_THROW(system_error(errno, "cannot write to file")); } @@ -242,26 +229,23 @@ template <> FMT_FUNC int count_digits<4>(detail::fallback_uintptr n) { template <typename T> const typename basic_data<T>::digit_pair basic_data<T>::digits[] = { - {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, - {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, - {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, - {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, - {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, - {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, - {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, - {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, - {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, - {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'}, - {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, - {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, - {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, - {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, - {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, - {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'}, - {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, - {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, - {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, - {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; + {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, + {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, + {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, + {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, + {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, + {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, + {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, + {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, + {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, + {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, + {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, + {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'}, + {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, + {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, + {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, + {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, + {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; template <typename T> const char basic_data<T>::hex_digits[] = "0123456789abcdef"; @@ -279,16 +263,24 @@ const uint64_t basic_data<T>::powers_of_10_64[] = { template <typename T> const uint32_t basic_data<T>::zero_or_powers_of_10_32[] = {0, FMT_POWERS_OF_10(1)}; - template <typename T> const uint64_t basic_data<T>::zero_or_powers_of_10_64[] = { 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), 10000000000000000000ULL}; +template <typename T> +const uint32_t basic_data<T>::zero_or_powers_of_10_32_new[] = { + 0, 0, FMT_POWERS_OF_10(1)}; + +template <typename T> +const uint64_t basic_data<T>::zero_or_powers_of_10_64_new[] = { + 0, 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), + 10000000000000000000ULL}; + // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. // These are generated by support/compute-powers.py. template <typename T> -const uint64_t basic_data<T>::pow10_significands[] = { +const uint64_t basic_data<T>::grisu_pow10_significands[] = { 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, @@ -323,7 +315,7 @@ const uint64_t basic_data<T>::pow10_significands[] = { // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding // to significands above. template <typename T> -const int16_t basic_data<T>::pow10_exponents[] = { +const int16_t basic_data<T>::grisu_pow10_exponents[] = { -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, @@ -334,6 +326,744 @@ const int16_t basic_data<T>::pow10_exponents[] = { 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; template <typename T> +const divtest_table_entry<uint32_t> basic_data<T>::divtest_table_for_pow5_32[] = + {{0x00000001, 0xffffffff}, {0xcccccccd, 0x33333333}, + {0xc28f5c29, 0x0a3d70a3}, {0x26e978d5, 0x020c49ba}, + {0x3afb7e91, 0x0068db8b}, {0x0bcbe61d, 0x0014f8b5}, + {0x68c26139, 0x000431bd}, {0xae8d46a5, 0x0000d6bf}, + {0x22e90e21, 0x00002af3}, {0x3a2e9c6d, 0x00000897}, + {0x3ed61f49, 0x000001b7}}; + +template <typename T> +const divtest_table_entry<uint64_t> basic_data<T>::divtest_table_for_pow5_64[] = + {{0x0000000000000001, 0xffffffffffffffff}, + {0xcccccccccccccccd, 0x3333333333333333}, + {0x8f5c28f5c28f5c29, 0x0a3d70a3d70a3d70}, + {0x1cac083126e978d5, 0x020c49ba5e353f7c}, + {0xd288ce703afb7e91, 0x0068db8bac710cb2}, + {0x5d4e8fb00bcbe61d, 0x0014f8b588e368f0}, + {0x790fb65668c26139, 0x000431bde82d7b63}, + {0xe5032477ae8d46a5, 0x0000d6bf94d5e57a}, + {0xc767074b22e90e21, 0x00002af31dc46118}, + {0x8e47ce423a2e9c6d, 0x0000089705f4136b}, + {0x4fa7f60d3ed61f49, 0x000001b7cdfd9d7b}, + {0x0fee64690c913975, 0x00000057f5ff85e5}, + {0x3662e0e1cf503eb1, 0x000000119799812d}, + {0xa47a2cf9f6433fbd, 0x0000000384b84d09}, + {0x54186f653140a659, 0x00000000b424dc35}, + {0x7738164770402145, 0x0000000024075f3d}, + {0xe4a4d1417cd9a041, 0x000000000734aca5}, + {0xc75429d9e5c5200d, 0x000000000170ef54}, + {0xc1773b91fac10669, 0x000000000049c977}, + {0x26b172506559ce15, 0x00000000000ec1e4}, + {0xd489e3a9addec2d1, 0x000000000002f394}, + {0x90e860bb892c8d5d, 0x000000000000971d}, + {0x502e79bf1b6f4f79, 0x0000000000001e39}, + {0xdcd618596be30fe5, 0x000000000000060b}}; + +template <typename T> +const uint64_t basic_data<T>::dragonbox_pow10_significands_64[] = { + 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, + 0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb, + 0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28, + 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb, + 0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a, + 0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810, + 0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff, + 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd, + 0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424, + 0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b, + 0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000, + 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000, + 0xc350000000000000, 0xf424000000000000, 0x9896800000000000, + 0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000, + 0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000, + 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000, + 0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, + 0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000, + 0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0, + 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940984, + 0xa18f07d736b90be5, 0xc9f2c9cd04674ede, 0xfc6f7c4045812296, + 0x9dc5ada82b70b59d, 0xc5371912364ce305, 0xf684df56c3e01bc6, + 0x9a130b963a6c115c, 0xc097ce7bc90715b3, 0xf0bdc21abb48db20, + 0x96769950b50d88f4, 0xbc143fa4e250eb31, 0xeb194f8e1ae525fd, + 0x92efd1b8d0cf37be, 0xb7abc627050305ad, 0xe596b7b0c643c719, + 0x8f7e32ce7bea5c6f, 0xb35dbf821ae4f38b, 0xe0352f62a19e306e}; + +template <typename T> +const uint128_wrapper basic_data<T>::dragonbox_pow10_significands_128[] = { +#if FMT_USE_FULL_CACHE_DRAGONBOX + {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, + {0x9faacf3df73609b1, 0x77b191618c54e9ad}, + {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, + {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e}, + {0x9becce62836ac577, 0x4ee367f9430aec33}, + {0xc2e801fb244576d5, 0x229c41f793cda740}, + {0xf3a20279ed56d48a, 0x6b43527578c11110}, + {0x9845418c345644d6, 0x830a13896b78aaaa}, + {0xbe5691ef416bd60c, 0x23cc986bc656d554}, + {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9}, + {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa}, + {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54}, + {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69}, + {0x91376c36d99995be, 0x23100809b9c21fa2}, + {0xb58547448ffffb2d, 0xabd40a0c2832a78b}, + {0xe2e69915b3fff9f9, 0x16c90c8f323f516d}, + {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4}, + {0xb1442798f49ffb4a, 0x99cd11cfdf41779d}, + {0xdd95317f31c7fa1d, 0x40405643d711d584}, + {0x8a7d3eef7f1cfc52, 0x482835ea666b2573}, + {0xad1c8eab5ee43b66, 0xda3243650005eed0}, + {0xd863b256369d4a40, 0x90bed43e40076a83}, + {0x873e4f75e2224e68, 0x5a7744a6e804a292}, + {0xa90de3535aaae202, 0x711515d0a205cb37}, + {0xd3515c2831559a83, 0x0d5a5b44ca873e04}, + {0x8412d9991ed58091, 0xe858790afe9486c3}, + {0xa5178fff668ae0b6, 0x626e974dbe39a873}, + {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, + {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a}, + {0xa139029f6a239f72, 0x1c1fffc1ebc44e81}, + {0xc987434744ac874e, 0xa327ffb266b56221}, + {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9}, + {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa}, + {0xc4ce17b399107c22, 0xcb550fb4384d21d4}, + {0xf6019da07f549b2b, 0x7e2a53a146606a49}, + {0x99c102844f94e0fb, 0x2eda7444cbfc426e}, + {0xc0314325637a1939, 0xfa911155fefb5309}, + {0xf03d93eebc589f88, 0x793555ab7eba27cb}, + {0x96267c7535b763b5, 0x4bc1558b2f3458df}, + {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17}, + {0xea9c227723ee8bcb, 0x465e15a979c1cadd}, + {0x92a1958a7675175f, 0x0bfacd89ec191eca}, + {0xb749faed14125d36, 0xcef980ec671f667c}, + {0xe51c79a85916f484, 0x82b7e12780e7401b}, + {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811}, + {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16}, + {0xdfbdcece67006ac9, 0x67a791e093e1d49b}, + {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1}, + {0xaecc49914078536d, 0x58fae9f773886e19}, + {0xda7f5bf590966848, 0xaf39a475506a899f}, + {0x888f99797a5e012d, 0x6d8406c952429604}, + {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84}, + {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65}, + {0x855c3be0a17fcd26, 0x5cf2eea09a550680}, + {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, + {0xd0601d8efc57b08b, 0xf13b94daf124da27}, + {0x823c12795db6ce57, 0x76c53d08d6b70859}, + {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f}, + {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a}, + {0xfe5d54150b090b02, 0xd3f93b35435d7c4d}, + {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0}, + {0xc6b8e9b0709f109a, 0x359ab6419ca1091c}, + {0xf867241c8cc6d4c0, 0xc30163d203c94b63}, + {0x9b407691d7fc44f8, 0x79e0de63425dcf1e}, + {0xc21094364dfb5636, 0x985915fc12f542e5}, + {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e}, + {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43}, + {0xbd8430bd08277231, 0x50c6ff782a838354}, + {0xece53cec4a314ebd, 0xa4f8bf5635246429}, + {0x940f4613ae5ed136, 0x871b7795e136be9a}, + {0xb913179899f68584, 0x28e2557b59846e40}, + {0xe757dd7ec07426e5, 0x331aeada2fe589d0}, + {0x9096ea6f3848984f, 0x3ff0d2c85def7622}, + {0xb4bca50b065abe63, 0x0fed077a756b53aa}, + {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895}, + {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d}, + {0xb080392cc4349dec, 0xbd8d794d96aacfb4}, + {0xdca04777f541c567, 0xecf0d7a0fc5583a1}, + {0x89e42caaf9491b60, 0xf41686c49db57245}, + {0xac5d37d5b79b6239, 0x311c2875c522ced6}, + {0xd77485cb25823ac7, 0x7d633293366b828c}, + {0x86a8d39ef77164bc, 0xae5dff9c02033198}, + {0xa8530886b54dbdeb, 0xd9f57f830283fdfd}, + {0xd267caa862a12d66, 0xd072df63c324fd7c}, + {0x8380dea93da4bc60, 0x4247cb9e59f71e6e}, + {0xa46116538d0deb78, 0x52d9be85f074e609}, + {0xcd795be870516656, 0x67902e276c921f8c}, + {0x806bd9714632dff6, 0x00ba1cd8a3db53b7}, + {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5}, + {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce}, + {0xfad2a4b13d1b5d6c, 0x796b805720085f82}, + {0x9cc3a6eec6311a63, 0xcbe3303674053bb1}, + {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d}, + {0xf4f1b4d515acb93b, 0xee92fb5515482d45}, + {0x991711052d8bf3c5, 0x751bdd152d4d1c4b}, + {0xbf5cd54678eef0b6, 0xd262d45a78a0635e}, + {0xef340a98172aace4, 0x86fb897116c87c35}, + {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1}, + {0xbae0a846d2195712, 0x8974836059cca10a}, + {0xe998d258869facd7, 0x2bd1a438703fc94c}, + {0x91ff83775423cc06, 0x7b6306a34627ddd0}, + {0xb67f6455292cbf08, 0x1a3bc84c17b1d543}, + {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94}, + {0x8e938662882af53e, 0x547eb47b7282ee9d}, + {0xb23867fb2a35b28d, 0xe99e619a4f23aa44}, + {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5}, + {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05}, + {0xae0b158b4738705e, 0x9624ab50b148d446}, + {0xd98ddaee19068c76, 0x3badd624dd9b0958}, + {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7}, + {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d}, + {0xd47487cc8470652b, 0x7647c32000696720}, + {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074}, + {0xa5fb0a17c777cf09, 0xf468107100525891}, + {0xcf79cc9db955c2cc, 0x7182148d4066eeb5}, + {0x81ac1fe293d599bf, 0xc6f14cd848405531}, + {0xa21727db38cb002f, 0xb8ada00e5a506a7d}, + {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d}, + {0xfd442e4688bd304a, 0x908f4a166d1da664}, + {0x9e4a9cec15763e2e, 0x9a598e4e043287ff}, + {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe}, + {0xf7549530e188c128, 0xd12bee59e68ef47d}, + {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf}, + {0xc13a148e3032d6e7, 0xe36a52363c1faf02}, + {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2}, + {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba}, + {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8}, + {0xebdf661791d60f56, 0x111b495b3464ad22}, + {0x936b9fcebb25c995, 0xcab10dd900beec35}, + {0xb84687c269ef3bfb, 0x3d5d514f40eea743}, + {0xe65829b3046b0afa, 0x0cb4a5a3112a5113}, + {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac}, + {0xb3f4e093db73a093, 0x59ed216765690f57}, + {0xe0f218b8d25088b8, 0x306869c13ec3532d}, + {0x8c974f7383725573, 0x1e414218c73a13fc}, + {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, + {0xdbac6c247d62a583, 0xdf45f746b74abf3a}, + {0x894bc396ce5da772, 0x6b8bba8c328eb784}, + {0xab9eb47c81f5114f, 0x066ea92f3f326565}, + {0xd686619ba27255a2, 0xc80a537b0efefebe}, + {0x8613fd0145877585, 0xbd06742ce95f5f37}, + {0xa798fc4196e952e7, 0x2c48113823b73705}, + {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6}, + {0x82ef85133de648c4, 0x9a984d73dbe722fc}, + {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb}, + {0xcc963fee10b7d1b3, 0x318df905079926a9}, + {0xffbbcfe994e5c61f, 0xfdf17746497f7053}, + {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634}, + {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1}, + {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1}, + {0x9c1661a651213e2d, 0x06bea10ca65c084f}, + {0xc31bfa0fe5698db8, 0x486e494fcff30a63}, + {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb}, + {0x986ddb5c6b3a76b7, 0xf89629465a75e01d}, + {0xbe89523386091465, 0xf6bbb397f1135824}, + {0xee2ba6c0678b597f, 0x746aa07ded582e2d}, + {0x94db483840b717ef, 0xa8c2a44eb4571cdd}, + {0xba121a4650e4ddeb, 0x92f34d62616ce414}, + {0xe896a0d7e51e1566, 0x77b020baf9c81d18}, + {0x915e2486ef32cd60, 0x0ace1474dc1d122f}, + {0xb5b5ada8aaff80b8, 0x0d819992132456bb}, + {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a}, + {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, + {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3}, + {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf}, + {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c}, + {0xad4ab7112eb3929d, 0x86c16c98d2c953c7}, + {0xd89d64d57a607744, 0xe871c7bf077ba8b8}, + {0x87625f056c7c4a8b, 0x11471cd764ad4973}, + {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0}, + {0xd389b47879823479, 0x4aff1d108d4ec2c4}, + {0x843610cb4bf160cb, 0xcedf722a585139bb}, + {0xa54394fe1eedb8fe, 0xc2974eb4ee658829}, + {0xce947a3da6a9273e, 0x733d226229feea33}, + {0x811ccc668829b887, 0x0806357d5a3f5260}, + {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8}, + {0xc9bcff6034c13052, 0xfc89b393dd02f0b6}, + {0xfc2c3f3841f17c67, 0xbbac2078d443ace3}, + {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e}, + {0xc5029163f384a931, 0x0a9e795e65d4df12}, + {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6}, + {0x99ea0196163fa42e, 0x504bced1bf8e4e46}, + {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7}, + {0xf07da27a82c37088, 0x5d767327bb4e5a4d}, + {0x964e858c91ba2655, 0x3a6a07f8d510f870}, + {0xbbe226efb628afea, 0x890489f70a55368c}, + {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f}, + {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e}, + {0xb77ada0617e3bbcb, 0x09ce6ebb40173745}, + {0xe55990879ddcaabd, 0xcc420a6a101d0516}, + {0x8f57fa54c2a9eab6, 0x9fa946824a12232e}, + {0xb32df8e9f3546564, 0x47939822dc96abfa}, + {0xdff9772470297ebd, 0x59787e2b93bc56f8}, + {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b}, + {0xaefae51477a06b03, 0xede622920b6b23f2}, + {0xdab99e59958885c4, 0xe95fab368e45ecee}, + {0x88b402f7fd75539b, 0x11dbcb0218ebb415}, + {0xaae103b5fcd2a881, 0xd652bdc29f26a11a}, + {0xd59944a37c0752a2, 0x4be76d3346f04960}, + {0x857fcae62d8493a5, 0x6f70a4400c562ddc}, + {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953}, + {0xd097ad07a71f26b2, 0x7e2000a41346a7a8}, + {0x825ecc24c873782f, 0x8ed400668c0c28c9}, + {0xa2f67f2dfa90563b, 0x728900802f0f32fb}, + {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba}, + {0xfea126b7d78186bc, 0xe2f610c84987bfa9}, + {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca}, + {0xc6ede63fa05d3143, 0x91503d1c79720dbc}, + {0xf8a95fcf88747d94, 0x75a44c6397ce912b}, + {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb}, + {0xc24452da229b021b, 0xfbe85badce996169}, + {0xf2d56790ab41c2a2, 0xfae27299423fb9c4}, + {0x97c560ba6b0919a5, 0xdccd879fc967d41b}, + {0xbdb6b8e905cb600f, 0x5400e987bbc1c921}, + {0xed246723473e3813, 0x290123e9aab23b69}, + {0x9436c0760c86e30b, 0xf9a0b6720aaf6522}, + {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, + {0xe7958cb87392c2c2, 0xb60b1d1230b20e05}, + {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3}, + {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4}, + {0xe2280b6c20dd5232, 0x25c6da63c38de1b1}, + {0x8d590723948a535f, 0x579c487e5a38ad0f}, + {0xb0af48ec79ace837, 0x2d835a9df0c6d852}, + {0xdcdb1b2798182244, 0xf8e431456cf88e66}, + {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900}, + {0xac8b2d36eed2dac5, 0xe272467e3d222f40}, + {0xd7adf884aa879177, 0x5b0ed81dcc6abb10}, + {0x86ccbb52ea94baea, 0x98e947129fc2b4ea}, + {0xa87fea27a539e9a5, 0x3f2398d747b36225}, + {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae}, + {0x83a3eeeef9153e89, 0x1953cf68300424ad}, + {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8}, + {0xcdb02555653131b6, 0x3792f412cb06794e}, + {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1}, + {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5}, + {0xc8de047564d20a8b, 0xf245825a5a445276}, + {0xfb158592be068d2e, 0xeed6e2f0f0d56713}, + {0x9ced737bb6c4183d, 0x55464dd69685606c}, + {0xc428d05aa4751e4c, 0xaa97e14c3c26b887}, + {0xf53304714d9265df, 0xd53dd99f4b3066a9}, + {0x993fe2c6d07b7fab, 0xe546a8038efe402a}, + {0xbf8fdb78849a5f96, 0xde98520472bdd034}, + {0xef73d256a5c0f77c, 0x963e66858f6d4441}, + {0x95a8637627989aad, 0xdde7001379a44aa9}, + {0xbb127c53b17ec159, 0x5560c018580d5d53}, + {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7}, + {0x9226712162ab070d, 0xcab3961304ca70e9}, + {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23}, + {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b}, + {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243}, + {0xb267ed1940f1c61c, 0x55f038b237591ed4}, + {0xdf01e85f912e37a3, 0x6b6c46dec52f6689}, + {0x8b61313bbabce2c6, 0x2323ac4b3b3da016}, + {0xae397d8aa96c1b77, 0xabec975e0a0d081b}, + {0xd9c7dced53c72255, 0x96e7bd358c904a22}, + {0x881cea14545c7575, 0x7e50d64177da2e55}, + {0xaa242499697392d2, 0xdde50bd1d5d0b9ea}, + {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865}, + {0x84ec3c97da624ab4, 0xbd5af13bef0b113f}, + {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f}, + {0xcfb11ead453994ba, 0x67de18eda5814af3}, + {0x81ceb32c4b43fcf4, 0x80eacf948770ced8}, + {0xa2425ff75e14fc31, 0xa1258379a94d028e}, + {0xcad2f7f5359a3b3e, 0x096ee45813a04331}, + {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd}, + {0x9e74d1b791e07e48, 0x775ea264cf55347e}, + {0xc612062576589dda, 0x95364afe032a819e}, + {0xf79687aed3eec551, 0x3a83ddbd83f52205}, + {0x9abe14cd44753b52, 0xc4926a9672793543}, + {0xc16d9a0095928a27, 0x75b7053c0f178294}, + {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, + {0x971da05074da7bee, 0xd3f6fc16ebca5e04}, + {0xbce5086492111aea, 0x88f4bb1ca6bcf585}, + {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6}, + {0x9392ee8e921d5d07, 0x3aff322e62439fd0}, + {0xb877aa3236a4b449, 0x09befeb9fad487c3}, + {0xe69594bec44de15b, 0x4c2ebe687989a9b4}, + {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11}, + {0xb424dc35095cd80f, 0x538484c19ef38c95}, + {0xe12e13424bb40e13, 0x2865a5f206b06fba}, + {0x8cbccc096f5088cb, 0xf93f87b7442e45d4}, + {0xafebff0bcb24aafe, 0xf78f69a51539d749}, + {0xdbe6fecebdedd5be, 0xb573440e5a884d1c}, + {0x89705f4136b4a597, 0x31680a88f8953031}, + {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e}, + {0xd6bf94d5e57a42bc, 0x3d32907604691b4d}, + {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110}, + {0xa7c5ac471b478423, 0x0fcf80dc33721d54}, + {0xd1b71758e219652b, 0xd3c36113404ea4a9}, + {0x83126e978d4fdf3b, 0x645a1cac083126ea}, + {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4}, + {0xcccccccccccccccc, 0xcccccccccccccccd}, + {0x8000000000000000, 0x0000000000000000}, + {0xa000000000000000, 0x0000000000000000}, + {0xc800000000000000, 0x0000000000000000}, + {0xfa00000000000000, 0x0000000000000000}, + {0x9c40000000000000, 0x0000000000000000}, + {0xc350000000000000, 0x0000000000000000}, + {0xf424000000000000, 0x0000000000000000}, + {0x9896800000000000, 0x0000000000000000}, + {0xbebc200000000000, 0x0000000000000000}, + {0xee6b280000000000, 0x0000000000000000}, + {0x9502f90000000000, 0x0000000000000000}, + {0xba43b74000000000, 0x0000000000000000}, + {0xe8d4a51000000000, 0x0000000000000000}, + {0x9184e72a00000000, 0x0000000000000000}, + {0xb5e620f480000000, 0x0000000000000000}, + {0xe35fa931a0000000, 0x0000000000000000}, + {0x8e1bc9bf04000000, 0x0000000000000000}, + {0xb1a2bc2ec5000000, 0x0000000000000000}, + {0xde0b6b3a76400000, 0x0000000000000000}, + {0x8ac7230489e80000, 0x0000000000000000}, + {0xad78ebc5ac620000, 0x0000000000000000}, + {0xd8d726b7177a8000, 0x0000000000000000}, + {0x878678326eac9000, 0x0000000000000000}, + {0xa968163f0a57b400, 0x0000000000000000}, + {0xd3c21bcecceda100, 0x0000000000000000}, + {0x84595161401484a0, 0x0000000000000000}, + {0xa56fa5b99019a5c8, 0x0000000000000000}, + {0xcecb8f27f4200f3a, 0x0000000000000000}, + {0x813f3978f8940984, 0x4000000000000000}, + {0xa18f07d736b90be5, 0x5000000000000000}, + {0xc9f2c9cd04674ede, 0xa400000000000000}, + {0xfc6f7c4045812296, 0x4d00000000000000}, + {0x9dc5ada82b70b59d, 0xf020000000000000}, + {0xc5371912364ce305, 0x6c28000000000000}, + {0xf684df56c3e01bc6, 0xc732000000000000}, + {0x9a130b963a6c115c, 0x3c7f400000000000}, + {0xc097ce7bc90715b3, 0x4b9f100000000000}, + {0xf0bdc21abb48db20, 0x1e86d40000000000}, + {0x96769950b50d88f4, 0x1314448000000000}, + {0xbc143fa4e250eb31, 0x17d955a000000000}, + {0xeb194f8e1ae525fd, 0x5dcfab0800000000}, + {0x92efd1b8d0cf37be, 0x5aa1cae500000000}, + {0xb7abc627050305ad, 0xf14a3d9e40000000}, + {0xe596b7b0c643c719, 0x6d9ccd05d0000000}, + {0x8f7e32ce7bea5c6f, 0xe4820023a2000000}, + {0xb35dbf821ae4f38b, 0xdda2802c8a800000}, + {0xe0352f62a19e306e, 0xd50b2037ad200000}, + {0x8c213d9da502de45, 0x4526f422cc340000}, + {0xaf298d050e4395d6, 0x9670b12b7f410000}, + {0xdaf3f04651d47b4c, 0x3c0cdd765f114000}, + {0x88d8762bf324cd0f, 0xa5880a69fb6ac800}, + {0xab0e93b6efee0053, 0x8eea0d047a457a00}, + {0xd5d238a4abe98068, 0x72a4904598d6d880}, + {0x85a36366eb71f041, 0x47a6da2b7f864750}, + {0xa70c3c40a64e6c51, 0x999090b65f67d924}, + {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d}, + {0x82818f1281ed449f, 0xbff8f10e7a8921a4}, + {0xa321f2d7226895c7, 0xaff72d52192b6a0d}, + {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490}, + {0xfee50b7025c36a08, 0x02f236d04753d5b4}, + {0x9f4f2726179a2245, 0x01d762422c946590}, + {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5}, + {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2}, + {0x9b934c3b330c8577, 0x63cc55f49f88eb2f}, + {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb}, + {0xf316271c7fc3908a, 0x8bef464e3945ef7a}, + {0x97edd871cfda3a56, 0x97758bf0e3cbb5ac}, + {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317}, + {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd}, + {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a}, + {0xb975d6b6ee39e436, 0xb3e2fd538e122b44}, + {0xe7d34c64a9c85d44, 0x60dbbca87196b616}, + {0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd}, + {0xb51d13aea4a488dd, 0x6babab6398bdbe41}, + {0xe264589a4dcdab14, 0xc696963c7eed2dd1}, + {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2}, + {0xb0de65388cc8ada8, 0x3b25a55f43294bcb}, + {0xdd15fe86affad912, 0x49ef0eb713f39ebe}, + {0x8a2dbf142dfcc7ab, 0x6e3569326c784337}, + {0xacb92ed9397bf996, 0x49c2c37f07965404}, + {0xd7e77a8f87daf7fb, 0xdc33745ec97be906}, + {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3}, + {0xa8acd7c0222311bc, 0xc40832ea0d68ce0c}, + {0xd2d80db02aabd62b, 0xf50a3fa490c30190}, + {0x83c7088e1aab65db, 0x792667c6da79e0fa}, + {0xa4b8cab1a1563f52, 0x577001b891185938}, + {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, + {0x80b05e5ac60b6178, 0x544f8158315b05b4}, + {0xa0dc75f1778e39d6, 0x696361ae3db1c721}, + {0xc913936dd571c84c, 0x03bc3a19cd1e38e9}, + {0xfb5878494ace3a5f, 0x04ab48a04065c723}, + {0x9d174b2dcec0e47b, 0x62eb0d64283f9c76}, + {0xc45d1df942711d9a, 0x3ba5d0bd324f8394}, + {0xf5746577930d6500, 0xca8f44ec7ee36479}, + {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb}, + {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e}, + {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e}, + {0x95d04aee3b80ece5, 0xbba1f1d158724a12}, + {0xbb445da9ca61281f, 0x2a8a6e45ae8edc97}, + {0xea1575143cf97226, 0xf52d09d71a3293bd}, + {0x924d692ca61be758, 0x593c2626705f9c56}, + {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c}, + {0xe498f455c38b997a, 0x0b6dfb9c0f956447}, + {0x8edf98b59a373fec, 0x4724bd4189bd5eac}, + {0xb2977ee300c50fe7, 0x58edec91ec2cb657}, + {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed}, + {0x8b865b215899f46c, 0xbd79e0d20082ee74}, + {0xae67f1e9aec07187, 0xecd8590680a3aa11}, + {0xda01ee641a708de9, 0xe80e6f4820cc9495}, + {0x884134fe908658b2, 0x3109058d147fdcdd}, + {0xaa51823e34a7eede, 0xbd4b46f0599fd415}, + {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a}, + {0x850fadc09923329e, 0x03e2cf6bc604ddb0}, + {0xa6539930bf6bff45, 0x84db8346b786151c}, + {0xcfe87f7cef46ff16, 0xe612641865679a63}, + {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e}, + {0xa26da3999aef7749, 0xe3be5e330f38f09d}, + {0xcb090c8001ab551c, 0x5cadf5bfd3072cc5}, + {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6}, + {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa}, + {0xc646d63501a1511d, 0xb281e1fd541501b8}, + {0xf7d88bc24209a565, 0x1f225a7ca91a4226}, + {0x9ae757596946075f, 0x3375788de9b06958}, + {0xc1a12d2fc3978937, 0x0052d6b1641c83ae}, + {0xf209787bb47d6b84, 0xc0678c5dbd23a49a}, + {0x9745eb4d50ce6332, 0xf840b7ba963646e0}, + {0xbd176620a501fbff, 0xb650e5a93bc3d898}, + {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe}, + {0x93ba47c980e98cdf, 0xc66f336c36b10137}, + {0xb8a8d9bbe123f017, 0xb80b0047445d4184}, + {0xe6d3102ad96cec1d, 0xa60dc059157491e5}, + {0x9043ea1ac7e41392, 0x87c89837ad68db2f}, + {0xb454e4a179dd1877, 0x29babe4598c311fb}, + {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a}, + {0x8ce2529e2734bb1d, 0x1899e4a65f58660c}, + {0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f}, + {0xdc21a1171d42645d, 0x76707543f4fa1f73}, + {0x899504ae72497eba, 0x6a06494a791c53a8}, + {0xabfa45da0edbde69, 0x0487db9d17636892}, + {0xd6f8d7509292d603, 0x45a9d2845d3c42b6}, + {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, + {0xa7f26836f282b732, 0x8e6cac7768d7141e}, + {0xd1ef0244af2364ff, 0x3207d795430cd926}, + {0x8335616aed761f1f, 0x7f44e6bd49e807b8}, + {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6}, + {0xcd036837130890a1, 0x36dba887c37a8c0f}, + {0x802221226be55a64, 0xc2494954da2c9789}, + {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c}, + {0xc83553c5c8965d3d, 0x6f92829494e5acc7}, + {0xfa42a8b73abbf48c, 0xcb772339ba1f17f9}, + {0x9c69a97284b578d7, 0xff2a760414536efb}, + {0xc38413cf25e2d70d, 0xfef5138519684aba}, + {0xf46518c2ef5b8cd1, 0x7eb258665fc25d69}, + {0x98bf2f79d5993802, 0xef2f773ffbd97a61}, + {0xbeeefb584aff8603, 0xaafb550ffacfd8fa}, + {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38}, + {0x952ab45cfa97a0b2, 0xdd945a747bf26183}, + {0xba756174393d88df, 0x94f971119aeef9e4}, + {0xe912b9d1478ceb17, 0x7a37cd5601aab85d}, + {0x91abb422ccb812ee, 0xac62e055c10ab33a}, + {0xb616a12b7fe617aa, 0x577b986b314d6009}, + {0xe39c49765fdf9d94, 0xed5a7e85fda0b80b}, + {0x8e41ade9fbebc27d, 0x14588f13be847307}, + {0xb1d219647ae6b31c, 0x596eb2d8ae258fc8}, + {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb}, + {0x8aec23d680043bee, 0x25de7bb9480d5854}, + {0xada72ccc20054ae9, 0xaf561aa79a10ae6a}, + {0xd910f7ff28069da4, 0x1b2ba1518094da04}, + {0x87aa9aff79042286, 0x90fb44d2f05d0842}, + {0xa99541bf57452b28, 0x353a1607ac744a53}, + {0xd3fa922f2d1675f2, 0x42889b8997915ce8}, + {0x847c9b5d7c2e09b7, 0x69956135febada11}, + {0xa59bc234db398c25, 0x43fab9837e699095}, + {0xcf02b2c21207ef2e, 0x94f967e45e03f4bb}, + {0x8161afb94b44f57d, 0x1d1be0eebac278f5}, + {0xa1ba1ba79e1632dc, 0x6462d92a69731732}, + {0xca28a291859bbf93, 0x7d7b8f7503cfdcfe}, + {0xfcb2cb35e702af78, 0x5cda735244c3d43e}, + {0x9defbf01b061adab, 0x3a0888136afa64a7}, + {0xc56baec21c7a1916, 0x088aaa1845b8fdd0}, + {0xf6c69a72a3989f5b, 0x8aad549e57273d45}, + {0x9a3c2087a63f6399, 0x36ac54e2f678864b}, + {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd}, + {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5}, + {0x969eb7c47859e743, 0x9f644ae5a4b1b325}, + {0xbc4665b596706114, 0x873d5d9f0dde1fee}, + {0xeb57ff22fc0c7959, 0xa90cb506d155a7ea}, + {0x9316ff75dd87cbd8, 0x09a7f12442d588f2}, + {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb2f}, + {0xe5d3ef282a242e81, 0x8f1668c8a86da5fa}, + {0x8fa475791a569d10, 0xf96e017d694487bc}, + {0xb38d92d760ec4455, 0x37c981dcc395a9ac}, + {0xe070f78d3927556a, 0x85bbe253f47b1417}, + {0x8c469ab843b89562, 0x93956d7478ccec8e}, + {0xaf58416654a6babb, 0x387ac8d1970027b2}, + {0xdb2e51bfe9d0696a, 0x06997b05fcc0319e}, + {0x88fcf317f22241e2, 0x441fece3bdf81f03}, + {0xab3c2fddeeaad25a, 0xd527e81cad7626c3}, + {0xd60b3bd56a5586f1, 0x8a71e223d8d3b074}, + {0x85c7056562757456, 0xf6872d5667844e49}, + {0xa738c6bebb12d16c, 0xb428f8ac016561db}, + {0xd106f86e69d785c7, 0xe13336d701beba52}, + {0x82a45b450226b39c, 0xecc0024661173473}, + {0xa34d721642b06084, 0x27f002d7f95d0190}, + {0xcc20ce9bd35c78a5, 0x31ec038df7b441f4}, + {0xff290242c83396ce, 0x7e67047175a15271}, + {0x9f79a169bd203e41, 0x0f0062c6e984d386}, + {0xc75809c42c684dd1, 0x52c07b78a3e60868}, + {0xf92e0c3537826145, 0xa7709a56ccdf8a82}, + {0x9bbcc7a142b17ccb, 0x88a66076400bb691}, + {0xc2abf989935ddbfe, 0x6acff893d00ea435}, + {0xf356f7ebf83552fe, 0x0583f6b8c4124d43}, + {0x98165af37b2153de, 0xc3727a337a8b704a}, + {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c}, + {0xeda2ee1c7064130c, 0x1162def06f79df73}, + {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8}, + {0xb9a74a0637ce2ee1, 0x6d953e2bd7173692}, + {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437}, + {0x910ab1d4db9914a0, 0x1d9c9892400a22a2}, + {0xb54d5e4a127f59c8, 0x2503beb6d00cab4b}, + {0xe2a0b5dc971f303a, 0x2e44ae64840fd61d}, + {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, + {0xb10d8e1456105dad, 0x7425a83e872c5f47}, + {0xdd50f1996b947518, 0xd12f124e28f77719}, + {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f}, + {0xace73cbfdc0bfb7b, 0x636cc64d1001550b}, + {0xd8210befd30efa5a, 0x3c47f7e05401aa4e}, + {0x8714a775e3e95c78, 0x65acfaec34810a71}, + {0xa8d9d1535ce3b396, 0x7f1839a741a14d0d}, + {0xd31045a8341ca07c, 0x1ede48111209a050}, + {0x83ea2b892091e44d, 0x934aed0aab460432}, + {0xa4e4b66b68b65d60, 0xf81da84d5617853f}, + {0xce1de40642e3f4b9, 0x36251260ab9d668e}, + {0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019}, + {0xa1075a24e4421730, 0xb24cf65b8612f81f}, + {0xc94930ae1d529cfc, 0xdee033f26797b627}, + {0xfb9b7cd9a4a7443c, 0x169840ef017da3b1}, + {0x9d412e0806e88aa5, 0x8e1f289560ee864e}, + {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2}, + {0xf5b5d7ec8acb58a2, 0xae10af696774b1db}, + {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29}, + {0xbff610b0cc6edd3f, 0x17fd090a58d32af3}, + {0xeff394dcff8a948e, 0xddfc4b4cef07f5b0}, + {0x95f83d0a1fb69cd9, 0x4abdaf101564f98e}, + {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1}, + {0xea53df5fd18d5513, 0x84c86189216dc5ed}, + {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4}, + {0xb7118682dbb66a77, 0x3fbc8c33221dc2a1}, + {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, + {0x8f05b1163ba6832d, 0x29cb4d87f2a7400e}, + {0xb2c71d5bca9023f8, 0x743e20e9ef511012}, + {0xdf78e4b2bd342cf6, 0x914da9246b255416}, + {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e}, + {0xae9672aba3d0c320, 0xa184ac2473b529b1}, + {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e}, + {0x8865899617fb1871, 0x7e2fa67c7a658892}, + {0xaa7eebfb9df9de8d, 0xddbb901b98feeab7}, + {0xd51ea6fa85785631, 0x552a74227f3ea565}, + {0x8533285c936b35de, 0xd53a88958f87275f}, + {0xa67ff273b8460356, 0x8a892abaf368f137}, + {0xd01fef10a657842c, 0x2d2b7569b0432d85}, + {0x8213f56a67f6b29b, 0x9c3b29620e29fc73}, + {0xa298f2c501f45f42, 0x8349f3ba91b47b8f}, + {0xcb3f2f7642717713, 0x241c70a936219a73}, + {0xfe0efb53d30dd4d7, 0xed238cd383aa0110}, + {0x9ec95d1463e8a506, 0xf4363804324a40aa}, + {0xc67bb4597ce2ce48, 0xb143c6053edcd0d5}, + {0xf81aa16fdc1b81da, 0xdd94b7868e94050a}, + {0x9b10a4e5e9913128, 0xca7cf2b4191c8326}, + {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0}, + {0xf24a01a73cf2dccf, 0xbc633b39673c8cec}, + {0x976e41088617ca01, 0xd5be0503e085d813}, + {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18}, + {0xec9c459d51852ba2, 0xddf8e7d60ed1219e}, + {0x93e1ab8252f33b45, 0xcabb90e5c942b503}, + {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, + {0xe7109bfba19c0c9d, 0x0cc512670a783ad4}, + {0x906a617d450187e2, 0x27fb2b80668b24c5}, + {0xb484f9dc9641e9da, 0xb1f9f660802dedf6}, + {0xe1a63853bbd26451, 0x5e7873f8a0396973}, + {0x8d07e33455637eb2, 0xdb0b487b6423e1e8}, + {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62}, + {0xdc5c5301c56b75f7, 0x7641a140cc7810fb}, + {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d}, + {0xac2820d9623bf429, 0x546345fa9fbdcd44}, + {0xd732290fbacaf133, 0xa97c177947ad4095}, + {0x867f59a9d4bed6c0, 0x49ed8eabcccc485d}, + {0xa81f301449ee8c70, 0x5c68f256bfff5a74}, + {0xd226fc195c6a2f8c, 0x73832eec6fff3111}, + {0x83585d8fd9c25db7, 0xc831fd53c5ff7eab}, + {0xa42e74f3d032f525, 0xba3e7ca8b77f5e55}, + {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb}, + {0x80444b5e7aa7cf85, 0x7980d163cf5b81b3}, + {0xa0555e361951c366, 0xd7e105bcc332621f}, + {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7}, + {0xfa856334878fc150, 0xb14f98f6f0feb951}, + {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3}, + {0xc3b8358109e84f07, 0x0a862f80ec4700c8}, + {0xf4a642e14c6262c8, 0xcd27bb612758c0fa}, + {0x98e7e9cccfbd7dbd, 0x8038d51cb897789c}, + {0xbf21e44003acdd2c, 0xe0470a63e6bd56c3}, + {0xeeea5d5004981478, 0x1858ccfce06cac74}, + {0x95527a5202df0ccb, 0x0f37801e0c43ebc8}, + {0xbaa718e68396cffd, 0xd30560258f54e6ba}, + {0xe950df20247c83fd, 0x47c6b82ef32a2069}, + {0x91d28b7416cdd27e, 0x4cdc331d57fa5441}, + {0xb6472e511c81471d, 0xe0133fe4adf8e952}, + {0xe3d8f9e563a198e5, 0x58180fddd97723a6}, + {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648}, + {0xb201833b35d63f73, 0x2cd2cc6551e513da}, + {0xde81e40a034bcf4f, 0xf8077f7ea65e58d1}, + {0x8b112e86420f6191, 0xfb04afaf27faf782}, + {0xadd57a27d29339f6, 0x79c5db9af1f9b563}, + {0xd94ad8b1c7380874, 0x18375281ae7822bc}, + {0x87cec76f1c830548, 0x8f2293910d0b15b5}, + {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb22}, + {0xd433179d9c8cb841, 0x5fa60692a46151eb}, + {0x849feec281d7f328, 0xdbc7c41ba6bcd333}, + {0xa5c7ea73224deff3, 0x12b9b522906c0800}, + {0xcf39e50feae16bef, 0xd768226b34870a00}, + {0x81842f29f2cce375, 0xe6a1158300d46640}, + {0xa1e53af46f801c53, 0x60495ae3c1097fd0}, + {0xca5e89b18b602368, 0x385bb19cb14bdfc4}, + {0xfcf62c1dee382c42, 0x46729e03dd9ed7b5}, + {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d1}, + {0xc5a05277621be293, 0xc7098b7305241885}, + {0xf70867153aa2db38, 0xb8cbee4fc66d1ea7} +#else + {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, + {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, + {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, + {0x86a8d39ef77164bc, 0xae5dff9c02033198}, + {0xd98ddaee19068c76, 0x3badd624dd9b0958}, + {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, + {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, + {0xe55990879ddcaabd, 0xcc420a6a101d0516}, + {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, + {0x95a8637627989aad, 0xdde7001379a44aa9}, + {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, + {0xc350000000000000, 0x0000000000000000}, + {0x9dc5ada82b70b59d, 0xf020000000000000}, + {0xfee50b7025c36a08, 0x02f236d04753d5b4}, + {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, + {0xa6539930bf6bff45, 0x84db8346b786151c}, + {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, + {0xd910f7ff28069da4, 0x1b2ba1518094da04}, + {0xaf58416654a6babb, 0x387ac8d1970027b2}, + {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, + {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, + {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, + {0x95527a5202df0ccb, 0x0f37801e0c43ebc8} +#endif +}; + +#if !FMT_USE_FULL_CACHE_DRAGONBOX +template <typename T> +const uint64_t basic_data<T>::powers_of_5_64[] = { + 0x0000000000000001, 0x0000000000000005, 0x0000000000000019, + 0x000000000000007d, 0x0000000000000271, 0x0000000000000c35, + 0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1, + 0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd, + 0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9, + 0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5, + 0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631, + 0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed, + 0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9}; + +template <typename T> +const uint32_t basic_data<T>::dragonbox_pow10_recovery_errors[] = { + 0x50001400, 0x54044100, 0x54014555, 0x55954415, 0x54115555, 0x00000001, + 0x50000000, 0x00104000, 0x54010004, 0x05004001, 0x55555544, 0x41545555, + 0x54040551, 0x15445545, 0x51555514, 0x10000015, 0x00101100, 0x01100015, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04450514, 0x45414110, + 0x55555145, 0x50544050, 0x15040155, 0x11054140, 0x50111514, 0x11451454, + 0x00400541, 0x00000000, 0x55555450, 0x10056551, 0x10054011, 0x55551014, + 0x69514555, 0x05151109, 0x00155555}; +#endif + +template <typename T> const char basic_data<T>::foreground_color[] = "\x1b[38;2;"; template <typename T> const char basic_data<T>::background_color[] = "\x1b[48;2;"; @@ -366,6 +1096,10 @@ class fp { private: using significand_type = uint64_t; + template <typename Float> + using is_supported_float = bool_constant<sizeof(Float) == sizeof(uint64_t) || + sizeof(Float) == sizeof(uint32_t)>; + public: significand_type f; int e; @@ -388,63 +1122,38 @@ class fp { template <typename Double> explicit fp(Double d) { assign(d); } // Assigns d to this and return true iff predecessor is closer than successor. - template <typename Double, FMT_ENABLE_IF(sizeof(Double) == sizeof(uint64_t))> - bool assign(Double d) { - // Assume double is in the format [sign][exponent][significand]. - using limits = std::numeric_limits<Double>; + template <typename Float, FMT_ENABLE_IF(is_supported_float<Float>::value)> + bool assign(Float d) { + // Assume float is in the format [sign][exponent][significand]. + using limits = std::numeric_limits<Float>; + const int float_significand_size = limits::digits - 1; const int exponent_size = - bits<Double>::value - double_significand_size - 1; // -1 for sign - const uint64_t significand_mask = implicit_bit - 1; + bits<Float>::value - float_significand_size - 1; // -1 for sign + const uint64_t float_implicit_bit = 1ULL << float_significand_size; + const uint64_t significand_mask = float_implicit_bit - 1; const uint64_t exponent_mask = (~0ULL >> 1) & ~significand_mask; const int exponent_bias = (1 << exponent_size) - limits::max_exponent - 1; - auto u = bit_cast<uint64_t>(d); + constexpr bool is_double = sizeof(Float) == sizeof(uint64_t); + auto u = bit_cast<conditional_t<is_double, uint64_t, uint32_t>>(d); f = u & significand_mask; int biased_e = - static_cast<int>((u & exponent_mask) >> double_significand_size); + static_cast<int>((u & exponent_mask) >> float_significand_size); // Predecessor is closer if d is a normalized power of 2 (f == 0) other than // the smallest normalized number (biased_e > 1). bool is_predecessor_closer = f == 0 && biased_e > 1; if (biased_e != 0) - f += implicit_bit; + f += float_implicit_bit; else biased_e = 1; // Subnormals use biased exponent 1 (min exponent). - e = biased_e - exponent_bias - double_significand_size; + e = biased_e - exponent_bias - float_significand_size; return is_predecessor_closer; } - template <typename Double, FMT_ENABLE_IF(sizeof(Double) != sizeof(uint64_t))> - bool assign(Double) { + template <typename Float, FMT_ENABLE_IF(!is_supported_float<Float>::value)> + bool assign(Float) { *this = fp(); return false; } - - // Assigns d to this together with computing lower and upper boundaries, - // where a boundary is a value half way between the number and its predecessor - // (lower) or successor (upper). The upper boundary is normalized and lower - // has the same exponent but may be not normalized. - template <typename Double> boundaries assign_with_boundaries(Double d) { - bool is_lower_closer = assign(d); - fp lower = - is_lower_closer ? fp((f << 2) - 1, e - 2) : fp((f << 1) - 1, e - 1); - // 1 in normalize accounts for the exponent shift above. - fp upper = normalize<1>(fp((f << 1) + 1, e - 1)); - lower.f <<= lower.e - upper.e; - return boundaries{lower.f, upper.f}; - } - - template <typename Double> boundaries assign_float_with_boundaries(Double d) { - assign(d); - constexpr int min_normal_e = std::numeric_limits<float>::min_exponent - - std::numeric_limits<double>::digits; - significand_type half_ulp = 1 << (std::numeric_limits<double>::digits - - std::numeric_limits<float>::digits - 1); - if (min_normal_e > e) half_ulp <<= min_normal_e - e; - fp upper = normalize<0>(fp(f + half_ulp, e)); - fp lower = fp( - f - (half_ulp >> ((f == implicit_bit && e > min_normal_e) ? 1 : 0)), e); - lower.f <<= lower.e - upper.e; - return boundaries{lower.f, upper.f}; - } }; // Normalizes the value converted from double and multiplied by (1 << SHIFT). @@ -488,11 +1197,12 @@ inline fp operator*(fp x, fp y) { return {multiply(x.f, y.f), x.e + y.e + 64}; } // Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its // (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. inline fp get_cached_power(int min_exponent, int& pow10_exponent) { - const int64_t one_over_log2_10 = 0x4d104d42; // round(pow(2, 32) / log2(10)) + const int shift = 32; + const auto significand = static_cast<int64_t>(data::log10_2_significand); int index = static_cast<int>( - ((min_exponent + fp::significand_size - 1) * one_over_log2_10 + - ((int64_t(1) << 32) - 1)) // ceil - >> 32 // arithmetic shift + ((min_exponent + fp::significand_size - 1) * (significand >> shift) + + ((int64_t(1) << shift) - 1)) // ceil + >> 32 // arithmetic shift ); // Decimal exponent of the first (smallest) cached power of 10. const int first_dec_exp = -348; @@ -500,7 +1210,8 @@ inline fp get_cached_power(int min_exponent, int& pow10_exponent) { const int dec_exp_step = 8; index = (index - first_dec_exp - 1) / dec_exp_step + 1; pow10_exponent = first_dec_exp + index * dec_exp_step; - return {data::pow10_significands[index], data::pow10_exponents[index]}; + return {data::grisu_pow10_significands[index], + data::grisu_pow10_exponents[index]}; } // A simple accumulator to hold the sums of terms in bigint::square if uint128_t @@ -559,9 +1270,8 @@ class bigint { FMT_ASSERT(compare(*this, other) >= 0, ""); bigit borrow = 0; int i = other.exp_ - exp_; - for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) { + for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) subtract_bigits(i, other.bigits_[j], borrow); - } while (borrow > 0) subtract_bigits(i, 0, borrow); remove_leading_zeros(); } @@ -733,22 +1443,26 @@ class bigint { exp_ *= 2; } + // If this bigint has a bigger exponent than other, adds trailing zero to make + // exponents equal. This simplifies some operations such as subtraction. + void align(const bigint& other) { + int exp_difference = exp_ - other.exp_; + if (exp_difference <= 0) return; + int num_bigits = static_cast<int>(bigits_.size()); + bigits_.resize(to_unsigned(num_bigits + exp_difference)); + for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) + bigits_[j] = bigits_[i]; + std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); + exp_ -= exp_difference; + } + // Divides this bignum by divisor, assigning the remainder to this and // returning the quotient. int divmod_assign(const bigint& divisor) { FMT_ASSERT(this != &divisor, ""); if (compare(*this, divisor) < 0) return 0; - int num_bigits = static_cast<int>(bigits_.size()); FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); - int exp_difference = exp_ - divisor.exp_; - if (exp_difference > 0) { - // Align bigints by adding trailing zeros to simplify subtraction. - bigits_.resize(to_unsigned(num_bigits + exp_difference)); - for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) - bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); - exp_ -= exp_difference; - } + align(divisor); int quotient = 0; do { subtract_aligned(divisor); @@ -788,20 +1502,6 @@ enum result { }; } -// A version of count_digits optimized for grisu_gen_digits. -inline int grisu_count_digits(uint32_t n) { - if (n < 10) return 1; - if (n < 100) return 2; - if (n < 1000) return 3; - if (n < 10000) return 4; - if (n < 100000) return 5; - if (n < 1000000) return 6; - if (n < 10000000) return 7; - if (n < 100000000) return 8; - if (n < 1000000000) return 9; - return 10; -} - // Generates output using the Grisu digit-gen algorithm. // error: the size of the region (lower, upper) outside of which numbers // definitely do not round to value (Delta in Grisu3). @@ -817,7 +1517,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, FMT_ASSERT(integral == value.f >> -one.e, ""); // The fractional part of scaled value (p2 in Grisu) c = value % one. uint64_t fractional = value.f & (one.f - 1); - exp = grisu_count_digits(integral); // kappa in Grisu. + exp = count_digits(integral); // kappa in Grisu. // Divide by 10 to prevent overflow. auto result = handler.on_start(data::powers_of_10_64[exp - 1] << -one.e, value.f / 10, error * 10, exp); @@ -867,8 +1567,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, FMT_ASSERT(false, "invalid number of digits"); } --exp; - uint64_t remainder = - (static_cast<uint64_t>(integral) << -one.e) + fractional; + auto remainder = (static_cast<uint64_t>(integral) << -one.e) + fractional; result = handler.on_digit(static_cast<char>('0' + digit), data::powers_of_10_64[exp] << -one.e, remainder, error, exp, true); @@ -878,8 +1577,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, for (;;) { fractional *= 10; error *= 10; - char digit = - static_cast<char>('0' + static_cast<char>(fractional >> -one.e)); + char digit = static_cast<char>('0' + (fractional >> -one.e)); fractional &= one.f - 1; --exp; result = handler.on_digit(digit, one.f, fractional, error, exp, false); @@ -916,6 +1614,7 @@ struct fixed_handler { uint64_t error, int, bool integral) { FMT_ASSERT(remainder < divisor, ""); buf[size++] = digit; + if (!integral && error >= remainder) return digits::error; if (size < precision) return digits::more; if (!integral) { // Check if error * 2 < divisor with overflow prevention. @@ -935,59 +1634,684 @@ struct fixed_handler { } if (buf[0] > '9') { buf[0] = '1'; - buf[size++] = '0'; + if (fixed) + buf[size++] = '0'; + else + ++exp10; } return digits::done; } }; -// The shortest representation digit handler. -struct grisu_shortest_handler { - char* buf; - int size; - // Distance between scaled value and upper bound (wp_W in Grisu3). - uint64_t diff; +// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox. +namespace dragonbox { +// Computes 128-bit result of multiplication of two 64-bit unsigned integers. +FMT_SAFEBUFFERS inline uint128_wrapper umul128(uint64_t x, + uint64_t y) FMT_NOEXCEPT { +#if FMT_USE_INT128 + return static_cast<uint128_t>(x) * static_cast<uint128_t>(y); +#elif defined(_MSC_VER) && defined(_M_X64) + uint128_wrapper result; + result.low_ = _umul128(x, y, &result.high_); + return result; +#else + const uint64_t mask = (uint64_t(1) << 32) - uint64_t(1); + + uint64_t a = x >> 32; + uint64_t b = x & mask; + uint64_t c = y >> 32; + uint64_t d = y & mask; + + uint64_t ac = a * c; + uint64_t bc = b * c; + uint64_t ad = a * d; + uint64_t bd = b * d; + + uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask); + + return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), + (intermediate << 32) + (bd & mask)}; +#endif +} + +// Computes upper 64 bits of multiplication of two 64-bit unsigned integers. +FMT_SAFEBUFFERS inline uint64_t umul128_upper64(uint64_t x, + uint64_t y) FMT_NOEXCEPT { +#if FMT_USE_INT128 + auto p = static_cast<uint128_t>(x) * static_cast<uint128_t>(y); + return static_cast<uint64_t>(p >> 64); +#elif defined(_MSC_VER) && defined(_M_X64) + return __umulh(x, y); +#else + return umul128(x, y).high(); +#endif +} + +// Computes upper 64 bits of multiplication of a 64-bit unsigned integer and a +// 128-bit unsigned integer. +FMT_SAFEBUFFERS inline uint64_t umul192_upper64(uint64_t x, uint128_wrapper y) + FMT_NOEXCEPT { + uint128_wrapper g0 = umul128(x, y.high()); + g0 += umul128_upper64(x, y.low()); + return g0.high(); +} + +// Computes upper 32 bits of multiplication of a 32-bit unsigned integer and a +// 64-bit unsigned integer. +inline uint32_t umul96_upper32(uint32_t x, uint64_t y) FMT_NOEXCEPT { + return static_cast<uint32_t>(umul128_upper64(x, y)); +} + +// Computes middle 64 bits of multiplication of a 64-bit unsigned integer and a +// 128-bit unsigned integer. +FMT_SAFEBUFFERS inline uint64_t umul192_middle64(uint64_t x, uint128_wrapper y) + FMT_NOEXCEPT { + uint64_t g01 = x * y.high(); + uint64_t g10 = umul128_upper64(x, y.low()); + return g01 + g10; +} + +// Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a +// 64-bit unsigned integer. +inline uint64_t umul96_lower64(uint32_t x, uint64_t y) FMT_NOEXCEPT { + return x * y; +} + +// Computes floor(log10(pow(2, e))) for e in [-1700, 1700] using the method from +// https://fmt.dev/papers/Grisu-Exact.pdf#page=5, section 3.4. +inline int floor_log10_pow2(int e) FMT_NOEXCEPT { + FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent"); + const int shift = 22; + return (e * static_cast<int>(data::log10_2_significand >> (64 - shift))) >> + shift; +} + +// Various fast log computations. +inline int floor_log2_pow10(int e) FMT_NOEXCEPT { + FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); + const uint64_t log2_10_integer_part = 3; + const uint64_t log2_10_fractional_digits = 0x5269e12f346e2bf9; + const int shift_amount = 19; + return (e * static_cast<int>( + (log2_10_integer_part << shift_amount) | + (log2_10_fractional_digits >> (64 - shift_amount)))) >> + shift_amount; +} +inline int floor_log10_pow2_minus_log10_4_over_3(int e) FMT_NOEXCEPT { + FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent"); + const uint64_t log10_4_over_3_fractional_digits = 0x1ffbfc2bbc780375; + const int shift_amount = 22; + return (e * static_cast<int>(data::log10_2_significand >> + (64 - shift_amount)) - + static_cast<int>(log10_4_over_3_fractional_digits >> + (64 - shift_amount))) >> + shift_amount; +} + +// Returns true iff x is divisible by pow(2, exp). +inline bool divisible_by_power_of_2(uint32_t x, int exp) FMT_NOEXCEPT { + FMT_ASSERT(exp >= 1, ""); + FMT_ASSERT(x != 0, ""); +#ifdef FMT_BUILTIN_CTZ + return FMT_BUILTIN_CTZ(x) >= exp; +#else + return exp < num_bits<uint32_t>() && x == ((x >> exp) << exp); +#endif +} +inline bool divisible_by_power_of_2(uint64_t x, int exp) FMT_NOEXCEPT { + FMT_ASSERT(exp >= 1, ""); + FMT_ASSERT(x != 0, ""); +#ifdef FMT_BUILTIN_CTZLL + return FMT_BUILTIN_CTZLL(x) >= exp; +#else + return exp < num_bits<uint64_t>() && x == ((x >> exp) << exp); +#endif +} + +// Returns true iff x is divisible by pow(5, exp). +inline bool divisible_by_power_of_5(uint32_t x, int exp) FMT_NOEXCEPT { + FMT_ASSERT(exp <= 10, "too large exponent"); + return x * data::divtest_table_for_pow5_32[exp].mod_inv <= + data::divtest_table_for_pow5_32[exp].max_quotient; +} +inline bool divisible_by_power_of_5(uint64_t x, int exp) FMT_NOEXCEPT { + FMT_ASSERT(exp <= 23, "too large exponent"); + return x * data::divtest_table_for_pow5_64[exp].mod_inv <= + data::divtest_table_for_pow5_64[exp].max_quotient; +} + +// Replaces n by floor(n / pow(5, N)) returning true if and only if n is +// divisible by pow(5, N). +// Precondition: n <= 2 * pow(5, N + 1). +template <int N> +bool check_divisibility_and_divide_by_pow5(uint32_t& n) FMT_NOEXCEPT { + static constexpr struct { + uint32_t magic_number; + int bits_for_comparison; + uint32_t threshold; + int shift_amount; + } infos[] = {{0xcccd, 16, 0x3333, 18}, {0xa429, 8, 0x0a, 20}}; + constexpr auto info = infos[N - 1]; + n *= info.magic_number; + const uint32_t comparison_mask = (1u << info.bits_for_comparison) - 1; + bool result = (n & comparison_mask) <= info.threshold; + n >>= info.shift_amount; + return result; +} + +// Computes floor(n / pow(10, N)) for small n and N. +// Precondition: n <= pow(10, N + 1). +template <int N> uint32_t small_division_by_pow10(uint32_t n) FMT_NOEXCEPT { + static constexpr struct { + uint32_t magic_number; + int shift_amount; + uint32_t divisor_times_10; + } infos[] = {{0xcccd, 19, 100}, {0xa3d8, 22, 1000}}; + constexpr auto info = infos[N - 1]; + FMT_ASSERT(n <= info.divisor_times_10, "n is too large"); + return n * info.magic_number >> info.shift_amount; +} + +// Computes floor(n / 10^(kappa + 1)) (float) +inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) FMT_NOEXCEPT { + return n / float_info<float>::big_divisor; +} +// Computes floor(n / 10^(kappa + 1)) (double) +inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) FMT_NOEXCEPT { + return umul128_upper64(n, 0x83126e978d4fdf3c) >> 9; +} + +// Various subroutines using pow10 cache +template <class T> struct cache_accessor; + +template <> struct cache_accessor<float> { + using carrier_uint = float_info<float>::carrier_uint; + using cache_entry_type = uint64_t; + + static uint64_t get_cached_power(int k) FMT_NOEXCEPT { + FMT_ASSERT(k >= float_info<float>::min_k && k <= float_info<float>::max_k, + "k is out of range"); + return data::dragonbox_pow10_significands_64[k - float_info<float>::min_k]; + } + + static carrier_uint compute_mul(carrier_uint u, + const cache_entry_type& cache) FMT_NOEXCEPT { + return umul96_upper32(u, cache); + } + + static uint32_t compute_delta(const cache_entry_type& cache, + int beta_minus_1) FMT_NOEXCEPT { + return static_cast<uint32_t>(cache >> (64 - 1 - beta_minus_1)); + } + + static bool compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta_minus_1) FMT_NOEXCEPT { + FMT_ASSERT(beta_minus_1 >= 1, ""); + FMT_ASSERT(beta_minus_1 < 64, ""); + + return ((umul96_lower64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0; + } + + static carrier_uint compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return static_cast<carrier_uint>( + (cache - (cache >> (float_info<float>::significand_bits + 2))) >> + (64 - float_info<float>::significand_bits - 1 - beta_minus_1)); + } + + static carrier_uint compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return static_cast<carrier_uint>( + (cache + (cache >> (float_info<float>::significand_bits + 1))) >> + (64 - float_info<float>::significand_bits - 1 - beta_minus_1)); + } + + static carrier_uint compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return (static_cast<carrier_uint>( + cache >> + (64 - float_info<float>::significand_bits - 2 - beta_minus_1)) + + 1) / + 2; + } +}; + +template <> struct cache_accessor<double> { + using carrier_uint = float_info<double>::carrier_uint; + using cache_entry_type = uint128_wrapper; + + static uint128_wrapper get_cached_power(int k) FMT_NOEXCEPT { + FMT_ASSERT(k >= float_info<double>::min_k && k <= float_info<double>::max_k, + "k is out of range"); + +#if FMT_USE_FULL_CACHE_DRAGONBOX + return data::dragonbox_pow10_significands_128[k - + float_info<double>::min_k]; +#else + static const int compression_ratio = 27; + + // Compute base index. + int cache_index = (k - float_info<double>::min_k) / compression_ratio; + int kb = cache_index * compression_ratio + float_info<double>::min_k; + int offset = k - kb; + + // Get base cache. + uint128_wrapper base_cache = + data::dragonbox_pow10_significands_128[cache_index]; + if (offset == 0) return base_cache; + + // Compute the required amount of bit-shift. + int alpha = floor_log2_pow10(kb + offset) - floor_log2_pow10(kb) - offset; + FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected"); + + // Try to recover the real cache. + uint64_t pow5 = data::powers_of_5_64[offset]; + uint128_wrapper recovered_cache = umul128(base_cache.high(), pow5); + uint128_wrapper middle_low = + umul128(base_cache.low() - (kb < 0 ? 1u : 0u), pow5); + + recovered_cache += middle_low.high(); + + uint64_t high_to_middle = recovered_cache.high() << (64 - alpha); + uint64_t middle_to_low = recovered_cache.low() << (64 - alpha); + + recovered_cache = + uint128_wrapper{(recovered_cache.low() >> alpha) | high_to_middle, + ((middle_low.low() >> alpha) | middle_to_low)}; + + if (kb < 0) recovered_cache += 1; + + // Get error. + int error_idx = (k - float_info<double>::min_k) / 16; + uint32_t error = (data::dragonbox_pow10_recovery_errors[error_idx] >> + ((k - float_info<double>::min_k) % 16) * 2) & + 0x3; + + // Add the error back. + FMT_ASSERT(recovered_cache.low() + error >= recovered_cache.low(), ""); + return {recovered_cache.high(), recovered_cache.low() + error}; +#endif + } + + static carrier_uint compute_mul(carrier_uint u, + const cache_entry_type& cache) FMT_NOEXCEPT { + return umul192_upper64(u, cache); + } + + static uint32_t compute_delta(cache_entry_type const& cache, + int beta_minus_1) FMT_NOEXCEPT { + return static_cast<uint32_t>(cache.high() >> (64 - 1 - beta_minus_1)); + } + + static bool compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta_minus_1) FMT_NOEXCEPT { + FMT_ASSERT(beta_minus_1 >= 1, ""); + FMT_ASSERT(beta_minus_1 < 64, ""); - digits::result on_start(uint64_t, uint64_t, uint64_t, int&) { - return digits::more; + return ((umul192_middle64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0; } - // Decrement the generated number approaching value from above. - void round(uint64_t d, uint64_t divisor, uint64_t& remainder, - uint64_t error) { - while ( - remainder < d && error - remainder >= divisor && - (remainder + divisor < d || d - remainder >= remainder + divisor - d)) { - --buf[size - 1]; - remainder += divisor; + static carrier_uint compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return (cache.high() - + (cache.high() >> (float_info<double>::significand_bits + 2))) >> + (64 - float_info<double>::significand_bits - 1 - beta_minus_1); + } + + static carrier_uint compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return (cache.high() + + (cache.high() >> (float_info<double>::significand_bits + 1))) >> + (64 - float_info<double>::significand_bits - 1 - beta_minus_1); + } + + static carrier_uint compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return ((cache.high() >> + (64 - float_info<double>::significand_bits - 2 - beta_minus_1)) + + 1) / + 2; + } +}; + +// Various integer checks +template <class T> +bool is_left_endpoint_integer_shorter_interval(int exponent) FMT_NOEXCEPT { + return exponent >= + float_info< + T>::case_shorter_interval_left_endpoint_lower_threshold && + exponent <= + float_info<T>::case_shorter_interval_left_endpoint_upper_threshold; +} +template <class T> +bool is_endpoint_integer(typename float_info<T>::carrier_uint two_f, + int exponent, int minus_k) FMT_NOEXCEPT { + if (exponent < float_info<T>::case_fc_pm_half_lower_threshold) return false; + // For k >= 0. + if (exponent <= float_info<T>::case_fc_pm_half_upper_threshold) return true; + // For k < 0. + if (exponent > float_info<T>::divisibility_check_by_5_threshold) return false; + return divisible_by_power_of_5(two_f, minus_k); +} + +template <class T> +bool is_center_integer(typename float_info<T>::carrier_uint two_f, int exponent, + int minus_k) FMT_NOEXCEPT { + // Exponent for 5 is negative. + if (exponent > float_info<T>::divisibility_check_by_5_threshold) return false; + if (exponent > float_info<T>::case_fc_upper_threshold) + return divisible_by_power_of_5(two_f, minus_k); + // Both exponents are nonnegative. + if (exponent >= float_info<T>::case_fc_lower_threshold) return true; + // Exponent for 2 is negative. + return divisible_by_power_of_2(two_f, minus_k - exponent + 1); +} + +// Remove trailing zeros from n and return the number of zeros removed (float) +FMT_ALWAYS_INLINE int remove_trailing_zeros(uint32_t& n) FMT_NOEXCEPT { +#ifdef FMT_BUILTIN_CTZ + int t = FMT_BUILTIN_CTZ(n); +#else + int t = ctz(n); +#endif + if (t > float_info<float>::max_trailing_zeros) + t = float_info<float>::max_trailing_zeros; + + const uint32_t mod_inv1 = 0xcccccccd; + const uint32_t max_quotient1 = 0x33333333; + const uint32_t mod_inv2 = 0xc28f5c29; + const uint32_t max_quotient2 = 0x0a3d70a3; + + int s = 0; + for (; s < t - 1; s += 2) { + if (n * mod_inv2 > max_quotient2) break; + n *= mod_inv2; + } + if (s < t && n * mod_inv1 <= max_quotient1) { + n *= mod_inv1; + ++s; + } + n >>= s; + return s; +} + +// Removes trailing zeros and returns the number of zeros removed (double) +FMT_ALWAYS_INLINE int remove_trailing_zeros(uint64_t& n) FMT_NOEXCEPT { +#ifdef FMT_BUILTIN_CTZLL + int t = FMT_BUILTIN_CTZLL(n); +#else + int t = ctzll(n); +#endif + if (t > float_info<double>::max_trailing_zeros) + t = float_info<double>::max_trailing_zeros; + // Divide by 10^8 and reduce to 32-bits + // Since ret_value.significand <= (2^64 - 1) / 1000 < 10^17, + // both of the quotient and the r should fit in 32-bits + + const uint32_t mod_inv1 = 0xcccccccd; + const uint32_t max_quotient1 = 0x33333333; + const uint64_t mod_inv8 = 0xc767074b22e90e21; + const uint64_t max_quotient8 = 0x00002af31dc46118; + + // If the number is divisible by 1'0000'0000, work with the quotient + if (t >= 8) { + auto quotient_candidate = n * mod_inv8; + + if (quotient_candidate <= max_quotient8) { + auto quotient = static_cast<uint32_t>(quotient_candidate >> 8); + + int s = 8; + for (; s < t; ++s) { + if (quotient * mod_inv1 > max_quotient1) break; + quotient *= mod_inv1; + } + quotient >>= (s - 8); + n = quotient; + return s; } } - // Implements Grisu's round_weed. - digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder, - uint64_t error, int exp, bool integral) { - buf[size++] = digit; - if (remainder >= error) return digits::more; - uint64_t unit = integral ? 1 : data::powers_of_10_64[-exp]; - uint64_t up = (diff - 1) * unit; // wp_Wup - round(up, divisor, remainder, error); - uint64_t down = (diff + 1) * unit; // wp_Wdown - if (remainder < down && error - remainder >= divisor && - (remainder + divisor < down || - down - remainder > remainder + divisor - down)) { - return digits::error; + // Otherwise, work with the remainder + auto quotient = static_cast<uint32_t>(n / 100000000); + auto remainder = static_cast<uint32_t>(n - 100000000 * quotient); + + if (t == 0 || remainder * mod_inv1 > max_quotient1) { + return 0; + } + remainder *= mod_inv1; + + if (t == 1 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 1) + quotient * 10000000ull; + return 1; + } + remainder *= mod_inv1; + + if (t == 2 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 2) + quotient * 1000000ull; + return 2; + } + remainder *= mod_inv1; + + if (t == 3 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 3) + quotient * 100000ull; + return 3; + } + remainder *= mod_inv1; + + if (t == 4 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 4) + quotient * 10000ull; + return 4; + } + remainder *= mod_inv1; + + if (t == 5 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 5) + quotient * 1000ull; + return 5; + } + remainder *= mod_inv1; + + if (t == 6 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 6) + quotient * 100ull; + return 6; + } + remainder *= mod_inv1; + + n = (remainder >> 7) + quotient * 10ull; + return 7; +} + +// The main algorithm for shorter interval case +template <class T> +FMT_ALWAYS_INLINE FMT_SAFEBUFFERS decimal_fp<T> shorter_interval_case( + int exponent) FMT_NOEXCEPT { + decimal_fp<T> ret_value; + // Compute k and beta + const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent); + const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k); + + // Compute xi and zi + using cache_entry_type = typename cache_accessor<T>::cache_entry_type; + const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k); + + auto xi = cache_accessor<T>::compute_left_endpoint_for_shorter_interval_case( + cache, beta_minus_1); + auto zi = cache_accessor<T>::compute_right_endpoint_for_shorter_interval_case( + cache, beta_minus_1); + + // If the left endpoint is not an integer, increase it + if (!is_left_endpoint_integer_shorter_interval<T>(exponent)) ++xi; + + // Try bigger divisor + ret_value.significand = zi / 10; + + // If succeed, remove trailing zeros if necessary and return + if (ret_value.significand * 10 >= xi) { + ret_value.exponent = minus_k + 1; + ret_value.exponent += remove_trailing_zeros(ret_value.significand); + return ret_value; + } + + // Otherwise, compute the round-up of y + ret_value.significand = + cache_accessor<T>::compute_round_up_for_shorter_interval_case( + cache, beta_minus_1); + ret_value.exponent = minus_k; + + // When tie occurs, choose one of them according to the rule + if (exponent >= float_info<T>::shorter_interval_tie_lower_threshold && + exponent <= float_info<T>::shorter_interval_tie_upper_threshold) { + ret_value.significand = ret_value.significand % 2 == 0 + ? ret_value.significand + : ret_value.significand - 1; + } else if (ret_value.significand < xi) { + ++ret_value.significand; + } + return ret_value; +} + +template <typename T> +FMT_SAFEBUFFERS decimal_fp<T> to_decimal(T x) FMT_NOEXCEPT { + // Step 1: integer promotion & Schubfach multiplier calculation. + + using carrier_uint = typename float_info<T>::carrier_uint; + using cache_entry_type = typename cache_accessor<T>::cache_entry_type; + auto br = bit_cast<carrier_uint>(x); + + // Extract significand bits and exponent bits. + const carrier_uint significand_mask = + (static_cast<carrier_uint>(1) << float_info<T>::significand_bits) - 1; + carrier_uint significand = (br & significand_mask); + int exponent = static_cast<int>((br & exponent_mask<T>()) >> + float_info<T>::significand_bits); + + if (exponent != 0) { // Check if normal. + exponent += float_info<T>::exponent_bias - float_info<T>::significand_bits; + + // Shorter interval case; proceed like Schubfach. + if (significand == 0) return shorter_interval_case<T>(exponent); + + significand |= + (static_cast<carrier_uint>(1) << float_info<T>::significand_bits); + } else { + // Subnormal case; the interval is always regular. + if (significand == 0) return {0, 0}; + exponent = float_info<T>::min_exponent - float_info<T>::significand_bits; + } + + const bool include_left_endpoint = (significand % 2 == 0); + const bool include_right_endpoint = include_left_endpoint; + + // Compute k and beta. + const int minus_k = floor_log10_pow2(exponent) - float_info<T>::kappa; + const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k); + const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k); + + // Compute zi and deltai + // 10^kappa <= deltai < 10^(kappa + 1) + const uint32_t deltai = cache_accessor<T>::compute_delta(cache, beta_minus_1); + const carrier_uint two_fc = significand << 1; + const carrier_uint two_fr = two_fc | 1; + const carrier_uint zi = + cache_accessor<T>::compute_mul(two_fr << beta_minus_1, cache); + + // Step 2: Try larger divisor; remove trailing zeros if necessary + + // Using an upper bound on zi, we might be able to optimize the division + // better than the compiler; we are computing zi / big_divisor here + decimal_fp<T> ret_value; + ret_value.significand = divide_by_10_to_kappa_plus_1(zi); + uint32_t r = static_cast<uint32_t>(zi - float_info<T>::big_divisor * + ret_value.significand); + + if (r > deltai) { + goto small_divisor_case_label; + } else if (r < deltai) { + // Exclude the right endpoint if necessary + if (r == 0 && !include_right_endpoint && + is_endpoint_integer<T>(two_fr, exponent, minus_k)) { + --ret_value.significand; + r = float_info<T>::big_divisor; + goto small_divisor_case_label; + } + } else { + // r == deltai; compare fractional parts + // Check conditions in the order different from the paper + // to take advantage of short-circuiting + const carrier_uint two_fl = two_fc - 1; + if ((!include_left_endpoint || + !is_endpoint_integer<T>(two_fl, exponent, minus_k)) && + !cache_accessor<T>::compute_mul_parity(two_fl, cache, beta_minus_1)) { + goto small_divisor_case_label; } - return 2 * unit <= remainder && remainder <= error - 4 * unit - ? digits::done - : digits::error; } -}; + ret_value.exponent = minus_k + float_info<T>::kappa + 1; + + // We may need to remove trailing zeros + ret_value.exponent += remove_trailing_zeros(ret_value.significand); + return ret_value; + + // Step 3: Find the significand with the smaller divisor + +small_divisor_case_label: + ret_value.significand *= 10; + ret_value.exponent = minus_k + float_info<T>::kappa; + + const uint32_t mask = (1u << float_info<T>::kappa) - 1; + auto dist = r - (deltai / 2) + (float_info<T>::small_divisor / 2); + + // Is dist divisible by 2^kappa? + if ((dist & mask) == 0) { + const bool approx_y_parity = + ((dist ^ (float_info<T>::small_divisor / 2)) & 1) != 0; + dist >>= float_info<T>::kappa; + + // Is dist divisible by 5^kappa? + if (check_divisibility_and_divide_by_pow5<float_info<T>::kappa>(dist)) { + ret_value.significand += dist; + + // Check z^(f) >= epsilon^(f) + // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1, + // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f) + // Since there are only 2 possibilities, we only need to care about the + // parity. Also, zi and r should have the same parity since the divisor + // is an even number + if (cache_accessor<T>::compute_mul_parity(two_fc, cache, beta_minus_1) != + approx_y_parity) { + --ret_value.significand; + } else { + // If z^(f) >= epsilon^(f), we might have a tie + // when z^(f) == epsilon^(f), or equivalently, when y is an integer + if (is_center_integer<T>(two_fc, exponent, minus_k)) { + ret_value.significand = ret_value.significand % 2 == 0 + ? ret_value.significand + : ret_value.significand - 1; + } + } + } + // Is dist not divisible by 5^kappa? + else { + ret_value.significand += dist; + } + } + // Is dist not divisible by 2^kappa? + else { + // Since we know dist is small, we might be able to optimize the division + // better than the compiler; we are computing dist / small_divisor here + ret_value.significand += + small_division_by_pow10<float_info<T>::kappa>(dist); + } + return ret_value; +} +} // namespace dragonbox // Formats value using a variation of the Fixed-Precision Positive // Floating-Point Printout ((FPP)^2) algorithm by Steele & White: // https://fmt.dev/p372-steele.pdf. template <typename Double> -void fallback_format(Double d, buffer<char>& buf, int& exp10) { +void fallback_format(Double d, int num_digits, bool binary32, buffer<char>& buf, + int& exp10) { bigint numerator; // 2 * R in (FPP)^2. bigint denominator; // 2 * S in (FPP)^2. // lower and upper are differences between value and corresponding boundaries. @@ -998,8 +2322,9 @@ void fallback_format(Double d, buffer<char>& buf, int& exp10) { // Shift numerator and denominator by an extra bit or two (if lower boundary // is closer) to make lower and upper integers. This eliminates multiplication // by 2 during later computations. - // TODO: handle float - int shift = value.assign(d) ? 2 : 1; + const bool is_predecessor_closer = + binary32 ? value.assign(static_cast<float>(d)) : value.assign(d); + int shift = is_predecessor_closer ? 2 : 1; uint64_t significand = value.f << shift; if (value.e >= 0) { numerator.assign(significand); @@ -1012,7 +2337,7 @@ void fallback_format(Double d, buffer<char>& buf, int& exp10) { upper = &upper_store; } denominator.assign_pow10(exp10); - denominator <<= 1; + denominator <<= shift; } else if (exp10 < 0) { numerator.assign_pow10(-exp10); lower.assign(numerator); @@ -1034,39 +2359,73 @@ void fallback_format(Double d, buffer<char>& buf, int& exp10) { upper = &upper_store; } } - if (!upper) upper = &lower; // Invariant: value == (numerator / denominator) * pow(10, exp10). - bool even = (value.f & 1) == 0; - int num_digits = 0; - char* data = buf.data(); - for (;;) { - int digit = numerator.divmod_assign(denominator); - bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. - // numerator + upper >[=] pow10: - bool high = add_compare(numerator, *upper, denominator) + even > 0; - data[num_digits++] = static_cast<char>('0' + digit); - if (low || high) { - if (!low) { - ++data[num_digits - 1]; - } else if (high) { - int result = add_compare(numerator, numerator, denominator); - // Round half to even. - if (result > 0 || (result == 0 && (digit % 2) != 0)) + if (num_digits < 0) { + // Generate the shortest representation. + if (!upper) upper = &lower; + bool even = (value.f & 1) == 0; + num_digits = 0; + char* data = buf.data(); + for (;;) { + int digit = numerator.divmod_assign(denominator); + bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. + // numerator + upper >[=] pow10: + bool high = add_compare(numerator, *upper, denominator) + even > 0; + data[num_digits++] = static_cast<char>('0' + digit); + if (low || high) { + if (!low) { ++data[num_digits - 1]; + } else if (high) { + int result = add_compare(numerator, numerator, denominator); + // Round half to even. + if (result > 0 || (result == 0 && (digit % 2) != 0)) + ++data[num_digits - 1]; + } + buf.try_resize(to_unsigned(num_digits)); + exp10 -= num_digits - 1; + return; } - buf.resize(to_unsigned(num_digits)); - exp10 -= num_digits - 1; - return; + numerator *= 10; + lower *= 10; + if (upper != &lower) *upper *= 10; } + } + // Generate the given number of digits. + exp10 -= num_digits - 1; + if (num_digits == 0) { + buf.try_resize(1); + denominator *= 10; + buf[0] = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; + return; + } + buf.try_resize(to_unsigned(num_digits)); + for (int i = 0; i < num_digits - 1; ++i) { + int digit = numerator.divmod_assign(denominator); + buf[i] = static_cast<char>('0' + digit); numerator *= 10; - lower *= 10; - if (upper != &lower) *upper *= 10; } + int digit = numerator.divmod_assign(denominator); + auto result = add_compare(numerator, numerator, denominator); + if (result > 0 || (result == 0 && (digit % 2) != 0)) { + if (digit == 9) { + const auto overflow = '0' + 10; + buf[num_digits - 1] = overflow; + // Propagate the carry. + for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) { + buf[i] = '0'; + ++buf[i - 1]; + } + if (buf[0] == overflow) { + buf[0] = '1'; + ++exp10; + } + return; + } + ++digit; + } + buf[num_digits - 1] = static_cast<char>('0' + digit); } -// Formats value using the Grisu algorithm -// (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf) -// if T is a IEEE754 binary32 or binary64 and snprintf otherwise. template <typename T> int format_float(T value, int precision, float_specs specs, buffer<char>& buf) { static_assert(!std::is_same<T, float>::value, ""); @@ -1078,66 +2437,57 @@ int format_float(T value, int precision, float_specs specs, buffer<char>& buf) { buf.push_back('0'); return 0; } - buf.resize(to_unsigned(precision)); + buf.try_resize(to_unsigned(precision)); std::uninitialized_fill_n(buf.data(), precision, '0'); return -precision; } if (!specs.use_grisu) return snprintf_float(value, precision, specs, buf); + if (precision < 0) { + // Use Dragonbox for the shortest format. + if (specs.binary32) { + auto dec = dragonbox::to_decimal(static_cast<float>(value)); + write<char>(buffer_appender<char>(buf), dec.significand); + return dec.exponent; + } + auto dec = dragonbox::to_decimal(static_cast<double>(value)); + write<char>(buffer_appender<char>(buf), dec.significand); + return dec.exponent; + } + + // Use Grisu + Dragon4 for the given precision: + // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf. int exp = 0; const int min_exp = -60; // alpha in Grisu. int cached_exp10 = 0; // K in Grisu. - if (precision < 0) { - fp fp_value; - auto boundaries = specs.binary32 - ? fp_value.assign_float_with_boundaries(value) - : fp_value.assign_with_boundaries(value); - fp_value = normalize(fp_value); - // Find a cached power of 10 such that multiplying value by it will bring - // the exponent in the range [min_exp, -32]. - const fp cached_pow = get_cached_power( - min_exp - (fp_value.e + fp::significand_size), cached_exp10); - // Multiply value and boundaries by the cached power of 10. - fp_value = fp_value * cached_pow; - boundaries.lower = multiply(boundaries.lower, cached_pow.f); - boundaries.upper = multiply(boundaries.upper, cached_pow.f); - assert(min_exp <= fp_value.e && fp_value.e <= -32); - --boundaries.lower; // \tilde{M}^- - 1 ulp -> M^-_{\downarrow}. - ++boundaries.upper; // \tilde{M}^+ + 1 ulp -> M^+_{\uparrow}. - // Numbers outside of (lower, upper) definitely do not round to value. - grisu_shortest_handler handler{buf.data(), 0, - boundaries.upper - fp_value.f}; - auto result = - grisu_gen_digits(fp(boundaries.upper, fp_value.e), - boundaries.upper - boundaries.lower, exp, handler); - if (result == digits::error) { - exp += handler.size - cached_exp10 - 1; - fallback_format(value, buf, exp); - return exp; - } - buf.resize(to_unsigned(handler.size)); + fp normalized = normalize(fp(value)); + const auto cached_pow = get_cached_power( + min_exp - (normalized.e + fp::significand_size), cached_exp10); + normalized = normalized * cached_pow; + // Limit precision to the maximum possible number of significant digits in an + // IEEE754 double because we don't need to generate zeros. + const int max_double_digits = 767; + if (precision > max_double_digits) precision = max_double_digits; + fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; + if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error) { + exp += handler.size - cached_exp10 - 1; + fallback_format(value, handler.precision, specs.binary32, buf, exp); } else { - if (precision > 17) return snprintf_float(value, precision, specs, buf); - fp normalized = normalize(fp(value)); - const auto cached_pow = get_cached_power( - min_exp - (normalized.e + fp::significand_size), cached_exp10); - normalized = normalized * cached_pow; - fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; - if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error) - return snprintf_float(value, precision, specs, buf); - int num_digits = handler.size; - if (!fixed) { - // Remove trailing zeros. - while (num_digits > 0 && buf[num_digits - 1] == '0') { - --num_digits; - ++exp; - } + exp += handler.exp10; + buf.try_resize(to_unsigned(handler.size)); + } + if (!fixed && !specs.showpoint) { + // Remove trailing zeros. + auto num_digits = buf.size(); + while (num_digits > 0 && buf[num_digits - 1] == '0') { + --num_digits; + ++exp; } - buf.resize(to_unsigned(num_digits)); + buf.try_resize(num_digits); } - return exp - cached_exp10; -} + return exp; +} // namespace detail template <typename T> int snprintf_float(T value, int precision, float_specs specs, @@ -1185,19 +2535,20 @@ int snprintf_float(T value, int precision, float_specs specs, ? snprintf_ptr(begin, capacity, format, precision, value) : snprintf_ptr(begin, capacity, format, value); if (result < 0) { - buf.reserve(buf.capacity() + 1); // The buffer will grow exponentially. + // The buffer will grow exponentially. + buf.try_reserve(buf.capacity() + 1); continue; } auto size = to_unsigned(result); // Size equal to capacity means that the last character was truncated. if (size >= capacity) { - buf.reserve(size + offset + 1); // Add 1 for the terminating '\0'. + buf.try_reserve(size + offset + 1); // Add 1 for the terminating '\0'. continue; } auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; if (specs.format == float_format::fixed) { if (precision == 0) { - buf.resize(size); + buf.try_resize(size); return 0; } // Find and remove the decimal point. @@ -1207,11 +2558,11 @@ int snprintf_float(T value, int precision, float_specs specs, } while (is_digit(*p)); int fraction_size = static_cast<int>(end - p - 1); std::memmove(p, p + 1, to_unsigned(fraction_size)); - buf.resize(size - 1); + buf.try_resize(size - 1); return -fraction_size; } if (specs.format == float_format::hex) { - buf.resize(size + offset); + buf.try_resize(size + offset); return 0; } // Find and parse the exponent. @@ -1237,7 +2588,7 @@ int snprintf_float(T value, int precision, float_specs specs, fraction_size = static_cast<int>(fraction_end - begin - 1); std::memmove(begin + 1, begin + 2, to_unsigned(fraction_size)); } - buf.resize(to_unsigned(fraction_size) + offset + 1); + buf.try_resize(to_unsigned(fraction_size) + offset + 1); return exp - fraction_size; } } @@ -1259,25 +2610,18 @@ int snprintf_float(T value, int precision, float_specs specs, * occurs, this pointer will be a guess that depends on the particular * error, but it will always advance at least one byte. */ -FMT_FUNC const char* utf8_decode(const char* buf, uint32_t* c, int* e) { - static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; +inline const char* utf8_decode(const char* buf, uint32_t* c, int* e) { static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; static const int shiftc[] = {0, 18, 12, 6, 0}; static const int shifte[] = {0, 6, 4, 2, 0}; - auto s = reinterpret_cast<const unsigned char*>(buf); - int len = lengths[s[0] >> 3]; - - // Compute the pointer to the next character early so that the next - // iteration can start working on the next character. Neither Clang - // nor GCC figure out this reordering on their own. - const char* next = buf + len + !len; + int len = code_point_length(buf); + const char* next = buf + len; // Assume a four-byte character and load four bytes. Unused bits are // shifted out. + auto s = reinterpret_cast<const unsigned char*>(buf); *c = uint32_t(s[0] & masks[len]) << 18; *c |= uint32_t(s[1] & 0x3f) << 12; *c |= uint32_t(s[2] & 0x3f) << 6; @@ -1296,6 +2640,19 @@ FMT_FUNC const char* utf8_decode(const char* buf, uint32_t* c, int* e) { return next; } + +struct stringifier { + template <typename T> FMT_INLINE std::string operator()(T value) const { + return to_string(value); + } + std::string operator()(basic_format_arg<format_context>::handle h) const { + memory_buffer buf; + format_parse_context parse_ctx({}); + format_context format_ctx(buffer_appender<char>(buf), {}, {}); + h.format(parse_ctx, format_ctx); + return to_string(buf); + } +}; } // namespace detail template <> struct formatter<detail::bigint> { @@ -1363,7 +2720,8 @@ FMT_FUNC void format_system_error(detail::buffer<char>& out, int error_code, int result = detail::safe_strerror(error_code, system_message, buf.size()); if (result == 0) { - format_to(std::back_inserter(out), "{}: {}", message, system_message); + format_to(detail::buffer_appender<char>(out), "{}: {}", message, + system_message); return; } if (result != ERANGE) @@ -1384,20 +2742,6 @@ FMT_FUNC void report_system_error(int error_code, report_error(format_system_error, error_code, message); } -struct stringifier { - template <typename T> FMT_INLINE std::string operator()(T value) const { - return to_string(value); - } - std::string operator()(basic_format_arg<format_context>::handle h) const { - memory_buffer buf; - detail::buffer<char>& base = buf; - format_parse_context parse_ctx({}); - format_context format_ctx(std::back_inserter(base), {}, {}); - h.format(parse_ctx, format_ctx); - return to_string(buf); - } -}; - FMT_FUNC std::string detail::vformat(string_view format_str, format_args args) { if (format_str.size() == 2 && equal2(format_str.data(), "{}")) { auto arg = args.get(0); @@ -1409,6 +2753,14 @@ FMT_FUNC std::string detail::vformat(string_view format_str, format_args args) { return to_string(buffer); } +#ifdef _WIN32 +namespace detail { +using dword = conditional_t<sizeof(long) == 4, unsigned long, unsigned>; +extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // + void*, const void*, dword, dword*, void*); +} // namespace detail +#endif + FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { memory_buffer buffer; detail::vformat_to(buffer, format_str, @@ -1417,10 +2769,10 @@ FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { auto fd = _fileno(f); if (_isatty(fd)) { detail::utf8_to_utf16 u16(string_view(buffer.data(), buffer.size())); - auto written = DWORD(); - if (!WriteConsoleW(reinterpret_cast<HANDLE>(_get_osfhandle(fd)), - u16.c_str(), static_cast<DWORD>(u16.size()), &written, - nullptr)) { + auto written = detail::dword(); + if (!detail::WriteConsoleW(reinterpret_cast<void*>(_get_osfhandle(fd)), + u16.c_str(), static_cast<uint32_t>(u16.size()), + &written, nullptr)) { FMT_THROW(format_error("failed to write to console")); } return; @@ -1446,8 +2798,4 @@ FMT_FUNC void vprint(string_view format_str, format_args args) { FMT_END_NAMESPACE -#ifdef _MSC_VER -# pragma warning(pop) -#endif - #endif // FMT_FORMAT_INL_H_ diff --git a/src/third_party/fmt/format.h b/src/third_party/fmt/format.h index 17509b7..1a037b0 100644 --- a/src/third_party/fmt/format.h +++ b/src/third_party/fmt/format.h @@ -70,9 +70,11 @@ #endif #if __cplusplus == 201103L || __cplusplus == 201402L -# if defined(__clang__) +# if defined(__INTEL_COMPILER) || defined(__PGI) +# define FMT_FALLTHROUGH +# elif defined(__clang__) # define FMT_FALLTHROUGH [[clang::fallthrough]] -# elif FMT_GCC_VERSION >= 700 && !defined(__PGI) && \ +# elif FMT_GCC_VERSION >= 700 && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520) # define FMT_FALLTHROUGH [[gnu::fallthrough]] # else @@ -139,12 +141,13 @@ FMT_END_NAMESPACE #endif #ifndef FMT_USE_UDL_TEMPLATE -// EDG frontend based compilers (icc, nvcc, etc) and GCC < 6.4 do not properly -// support UDL templates and GCC >= 9 warns about them. +// EDG frontend based compilers (icc, nvcc, PGI, etc) and GCC < 6.4 do not +// properly support UDL templates and GCC >= 9 warns about them. # if FMT_USE_USER_DEFINED_LITERALS && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 501) && \ ((FMT_GCC_VERSION >= 604 && __cplusplus >= 201402L) || \ - FMT_CLANG_VERSION >= 304) + FMT_CLANG_VERSION >= 304) && \ + !defined(__PGI) && !defined(__NVCC__) # define FMT_USE_UDL_TEMPLATE 1 # else # define FMT_USE_UDL_TEMPLATE 0 @@ -163,6 +166,14 @@ FMT_END_NAMESPACE # define FMT_USE_LONG_DOUBLE 1 #endif +// Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of +// int_writer template instances to just one by only using the largest integer +// type. This results in a reduction in binary size but will cause a decrease in +// integer formatting performance. +#if !defined(FMT_REDUCE_INT_INSTANTIATIONS) +# define FMT_REDUCE_INT_INSTANTIATIONS 0 +#endif + // __builtin_clz is broken in clang with Microsoft CodeGen: // https://github.com/fmtlib/fmt/issues/519 #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER @@ -171,56 +182,87 @@ FMT_END_NAMESPACE #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER # define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n) #endif +#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctz)) +# define FMT_BUILTIN_CTZ(n) __builtin_ctz(n) +#endif +#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctzll)) +# define FMT_BUILTIN_CTZLL(n) __builtin_ctzll(n) +#endif + +#if FMT_MSC_VER +# include <intrin.h> // _BitScanReverse[64], _BitScanForward[64], _umul128 +#endif // Some compilers masquerade as both MSVC and GCC-likes or otherwise support // __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the // MSVC intrinsics if the clz and clzll builtins are not available. -#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED) -# include <intrin.h> // _BitScanReverse, _BitScanReverse64 - +#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && \ + !defined(FMT_BUILTIN_CTZLL) && !defined(_MANAGED) FMT_BEGIN_NAMESPACE namespace detail { // Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning. # ifndef __clang__ +# pragma intrinsic(_BitScanForward) # pragma intrinsic(_BitScanReverse) # endif -inline uint32_t clz(uint32_t x) { +# if defined(_WIN64) && !defined(__clang__) +# pragma intrinsic(_BitScanForward64) +# pragma intrinsic(_BitScanReverse64) +# endif + +inline int clz(uint32_t x) { unsigned long r = 0; _BitScanReverse(&r, x); - FMT_ASSERT(x != 0, ""); // Static analysis complains about using uninitialized data // "r", but the only way that can happen is if "x" is 0, // which the callers guarantee to not happen. FMT_SUPPRESS_MSC_WARNING(6102) - return 31 - r; + return 31 ^ static_cast<int>(r); } # define FMT_BUILTIN_CLZ(n) detail::clz(n) -# if defined(_WIN64) && !defined(__clang__) -# pragma intrinsic(_BitScanReverse64) -# endif - -inline uint32_t clzll(uint64_t x) { +inline int clzll(uint64_t x) { unsigned long r = 0; # ifdef _WIN64 _BitScanReverse64(&r, x); # else // Scan the high 32 bits. - if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32))) return 63 - (r + 32); - + if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32))) return 63 ^ (r + 32); // Scan the low 32 bits. _BitScanReverse(&r, static_cast<uint32_t>(x)); # endif - FMT_ASSERT(x != 0, ""); - // Static analysis complains about using uninitialized data - // "r", but the only way that can happen is if "x" is 0, - // which the callers guarantee to not happen. - FMT_SUPPRESS_MSC_WARNING(6102) - return 63 - r; + FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning. + return 63 ^ static_cast<int>(r); } # define FMT_BUILTIN_CLZLL(n) detail::clzll(n) + +inline int ctz(uint32_t x) { + unsigned long r = 0; + _BitScanForward(&r, x); + FMT_ASSERT(x != 0, ""); + FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning. + return static_cast<int>(r); +} +# define FMT_BUILTIN_CTZ(n) detail::ctz(n) + +inline int ctzll(uint64_t x) { + unsigned long r = 0; + FMT_ASSERT(x != 0, ""); + FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning. +# ifdef _WIN64 + _BitScanForward64(&r, x); +# else + // Scan the low 32 bits. + if (_BitScanForward(&r, static_cast<uint32_t>(x))) return static_cast<int>(r); + // Scan the high 32 bits. + _BitScanForward(&r, static_cast<uint32_t>(x >> 32)); + r += 32; +# endif + return static_cast<int>(r); +} +# define FMT_BUILTIN_CTZLL(n) detail::ctzll(n) } // namespace detail FMT_END_NAMESPACE #endif @@ -298,50 +340,11 @@ FMT_INLINE void assume(bool condition) { #endif } -// A workaround for gcc 4.8 to make void_t work in a SFINAE context. -template <typename... Ts> struct void_t_impl { using type = void; }; - -template <typename... Ts> -using void_t = typename detail::void_t_impl<Ts...>::type; - // An approximation of iterator_t for pre-C++20 systems. template <typename T> using iterator_t = decltype(std::begin(std::declval<T&>())); template <typename T> using sentinel_t = decltype(std::end(std::declval<T&>())); -// Detect the iterator category of *any* given type in a SFINAE-friendly way. -// Unfortunately, older implementations of std::iterator_traits are not safe -// for use in a SFINAE-context. -template <typename It, typename Enable = void> -struct iterator_category : std::false_type {}; - -template <typename T> struct iterator_category<T*> { - using type = std::random_access_iterator_tag; -}; - -template <typename It> -struct iterator_category<It, void_t<typename It::iterator_category>> { - using type = typename It::iterator_category; -}; - -// Detect if *any* given type models the OutputIterator concept. -template <typename It> class is_output_iterator { - // Check for mutability because all iterator categories derived from - // std::input_iterator_tag *may* also meet the requirements of an - // OutputIterator, thereby falling into the category of 'mutable iterators' - // [iterator.requirements.general] clause 4. The compiler reveals this - // property only at the point of *actually dereferencing* the iterator! - template <typename U> - static decltype(*(std::declval<U>())) test(std::input_iterator_tag); - template <typename U> static char& test(std::output_iterator_tag); - template <typename U> static const char& test(...); - - using type = decltype(test<It>(typename iterator_category<It>::type{})); - - public: - enum { value = !std::is_const<remove_reference_t<type>>::value }; -}; - // A workaround for std::string not having mutable data() until C++17. template <typename Char> inline Char* get_data(std::basic_string<Char>& s) { return &s[0]; @@ -374,10 +377,29 @@ reserve(std::back_insert_iterator<Container> it, size_t n) { return make_checked(get_data(c) + size, n); } +template <typename T> +inline buffer_appender<T> reserve(buffer_appender<T> it, size_t n) { + buffer<T>& buf = get_container(it); + buf.try_reserve(buf.size() + n); + return it; +} + template <typename Iterator> inline Iterator& reserve(Iterator& it, size_t) { return it; } +template <typename T, typename OutputIt> +constexpr T* to_pointer(OutputIt, size_t) { + return nullptr; +} +template <typename T> T* to_pointer(buffer_appender<T> it, size_t n) { + buffer<T>& buf = get_container(it); + auto size = buf.size(); + if (buf.capacity() < size + n) return nullptr; + buf.try_resize(size + n); + return buf.data() + size; +} + template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)> inline std::back_insert_iterator<Container> base_iterator( std::back_insert_iterator<Container>& it, @@ -415,13 +437,17 @@ class counting_iterator { ++count_; return *this; } - counting_iterator operator++(int) { auto it = *this; ++*this; return it; } + friend counting_iterator operator+(counting_iterator it, difference_type n) { + it.count_ += static_cast<size_t>(n); + return it; + } + value_type operator*() const { return {}; } }; @@ -555,23 +581,38 @@ OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { [](char c) { return static_cast<char8_type>(c); }); } -#ifndef FMT_USE_GRISU -# define FMT_USE_GRISU 1 -#endif - -template <typename T> constexpr bool use_grisu() { - return FMT_USE_GRISU && std::numeric_limits<double>::is_iec559 && - sizeof(T) <= sizeof(double); +template <typename Char, typename InputIt> +inline counting_iterator copy_str(InputIt begin, InputIt end, + counting_iterator it) { + return it + (end - begin); } template <typename T> +using is_fast_float = bool_constant<std::numeric_limits<T>::is_iec559 && + sizeof(T) <= sizeof(double)>; + +#ifndef FMT_USE_FULL_CACHE_DRAGONBOX +# define FMT_USE_FULL_CACHE_DRAGONBOX 0 +#endif + +template <typename T> template <typename U> void buffer<T>::append(const U* begin, const U* end) { - size_t new_size = size_ + to_unsigned(end - begin); - reserve(new_size); - std::uninitialized_copy(begin, end, - make_checked(ptr_ + size_, capacity_ - size_)); - size_ = new_size; + do { + auto count = to_unsigned(end - begin); + try_reserve(size_ + count); + auto free_cap = capacity_ - size_; + if (free_cap < count) count = free_cap; + std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count)); + size_ += count; + begin += count; + } while (begin != end); +} + +template <typename OutputIt, typename T, typename Traits> +void iterator_buffer<OutputIt, T, Traits>::flush() { + out_ = std::copy_n(data_, this->limit(this->size()), out_); + this->clear(); } } // namespace detail @@ -610,7 +651,7 @@ enum { inline_buffer_size = 500 }; */ template <typename T, size_t SIZE = inline_buffer_size, typename Allocator = std::allocator<T>> -class basic_memory_buffer : public detail::buffer<T> { +class basic_memory_buffer final : public detail::buffer<T> { private: T store_[SIZE]; @@ -624,7 +665,7 @@ class basic_memory_buffer : public detail::buffer<T> { } protected: - void grow(size_t size) FMT_OVERRIDE; + void grow(size_t size) final FMT_OVERRIDE; public: using value_type = T; @@ -634,7 +675,7 @@ class basic_memory_buffer : public detail::buffer<T> { : alloc_(alloc) { this->set(store_, SIZE); } - ~basic_memory_buffer() FMT_OVERRIDE { deallocate(); } + ~basic_memory_buffer() { deallocate(); } private: // Move data from other to this buffer. @@ -678,6 +719,22 @@ class basic_memory_buffer : public detail::buffer<T> { // Returns a copy of the allocator associated with this buffer. Allocator get_allocator() const { return alloc_; } + + /** + Resizes the buffer to contain *count* elements. If T is a POD type new + elements may not be initialized. + */ + void resize(size_t count) { this->try_resize(count); } + + /** Increases the buffer capacity to *new_capacity*. */ + void reserve(size_t new_capacity) { this->try_reserve(new_capacity); } + + // Directly append data into the buffer + using detail::buffer<T>::append; + template <typename ContiguousRange> + void append(const ContiguousRange& range) { + append(range.data(), range.data() + range.size()); + } }; template <typename T, size_t SIZE, typename Allocator> @@ -748,19 +805,81 @@ FMT_CONSTEXPR bool is_supported_floating_point(T) { } // Smallest of uint32_t, uint64_t, uint128_t that is large enough to -// represent all values of T. +// represent all values of an integral type T. template <typename T> using uint32_or_64_or_128_t = - conditional_t<num_bits<T>() <= 32, uint32_t, + conditional_t<num_bits<T>() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS, + uint32_t, conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>>; +// 128-bit integer type used internally +struct FMT_EXTERN_TEMPLATE_API uint128_wrapper { + uint128_wrapper() = default; + +#if FMT_USE_INT128 + uint128_t internal_; + + uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT + : internal_{static_cast<uint128_t>(low) | + (static_cast<uint128_t>(high) << 64)} {} + + uint128_wrapper(uint128_t u) : internal_{u} {} + + uint64_t high() const FMT_NOEXCEPT { return uint64_t(internal_ >> 64); } + uint64_t low() const FMT_NOEXCEPT { return uint64_t(internal_); } + + uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { + internal_ += n; + return *this; + } +#else + uint64_t high_; + uint64_t low_; + + uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT : high_{high}, + low_{low} {} + + uint64_t high() const FMT_NOEXCEPT { return high_; } + uint64_t low() const FMT_NOEXCEPT { return low_; } + + uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { +# if defined(_MSC_VER) && defined(_M_X64) + unsigned char carry = _addcarry_u64(0, low_, n, &low_); + _addcarry_u64(carry, high_, 0, &high_); + return *this; +# else + uint64_t sum = low_ + n; + high_ += (sum < low_ ? 1 : 0); + low_ = sum; + return *this; +# endif + } +#endif +}; + +// Table entry type for divisibility test used internally +template <typename T> struct FMT_EXTERN_TEMPLATE_API divtest_table_entry { + T mod_inv; + T max_quotient; +}; + // Static data is placed in this class template for the header-only config. template <typename T = void> struct FMT_EXTERN_TEMPLATE_API basic_data { static const uint64_t powers_of_10_64[]; - static const uint32_t zero_or_powers_of_10_32[]; - static const uint64_t zero_or_powers_of_10_64[]; - static const uint64_t pow10_significands[]; - static const int16_t pow10_exponents[]; + static const uint32_t zero_or_powers_of_10_32_new[]; + static const uint64_t zero_or_powers_of_10_64_new[]; + static const uint64_t grisu_pow10_significands[]; + static const int16_t grisu_pow10_exponents[]; + static const divtest_table_entry<uint32_t> divtest_table_for_pow5_32[]; + static const divtest_table_entry<uint64_t> divtest_table_for_pow5_64[]; + static const uint64_t dragonbox_pow10_significands_64[]; + static const uint128_wrapper dragonbox_pow10_significands_128[]; + // log10(2) = 0x0.4d104d427de7fbcc... + static const uint64_t log10_2_significand = 0x4d104d427de7fbcc; +#if !FMT_USE_FULL_CACHE_DRAGONBOX + static const uint64_t powers_of_5_64[]; + static const uint32_t dragonbox_pow10_recovery_errors[]; +#endif // GCC generates slightly better code for pairs than chars. using digit_pair = char[2]; static const digit_pair digits[]; @@ -772,8 +891,23 @@ template <typename T = void> struct FMT_EXTERN_TEMPLATE_API basic_data { static const char signs[]; static const char left_padding_shifts[5]; static const char right_padding_shifts[5]; + + // DEPRECATED! These are for ABI compatibility. + static const uint32_t zero_or_powers_of_10_32[]; + static const uint64_t zero_or_powers_of_10_64[]; }; +// Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)). +// This is a function instead of an array to workaround a bug in GCC10 (#1810). +FMT_INLINE uint16_t bsr2log10(int bsr) { + static constexpr uint16_t data[] = { + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, + 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, + 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20}; + return data[bsr]; +} + #ifndef FMT_EXPORTED FMT_EXTERN template struct basic_data<void>; #endif @@ -785,10 +919,9 @@ struct data : basic_data<> {}; // Returns the number of decimal digits in n. Leading zeros are not counted // except for n == 0 in which case count_digits returns 1. inline int count_digits(uint64_t n) { - // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 - // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits. - int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12; - return t - (n < data::zero_or_powers_of_10_64[t]) + 1; + // https://github.com/fmtlib/format-benchmark/blob/master/digits10 + auto t = bsr2log10(FMT_BUILTIN_CLZLL(n | 1) ^ 63); + return t - (n < data::zero_or_powers_of_10_64_new[t]); } #else // Fallback version of count_digits used when __builtin_clz is not available. @@ -838,15 +971,24 @@ template <> int count_digits<4>(detail::fallback_uintptr n); #if FMT_GCC_VERSION || FMT_CLANG_VERSION # define FMT_ALWAYS_INLINE inline __attribute__((always_inline)) +#elif FMT_MSC_VER +# define FMT_ALWAYS_INLINE __forceinline #else -# define FMT_ALWAYS_INLINE +# define FMT_ALWAYS_INLINE inline +#endif + +// To suppress unnecessary security cookie checks +#if FMT_MSC_VER && !FMT_CLANG_VERSION +# define FMT_SAFEBUFFERS __declspec(safebuffers) +#else +# define FMT_SAFEBUFFERS #endif #ifdef FMT_BUILTIN_CLZ // Optional version of count_digits for better performance on 32-bit platforms. inline int count_digits(uint32_t n) { - int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12; - return t - (n < data::zero_or_powers_of_10_32[t]) + 1; + auto t = bsr2log10(FMT_BUILTIN_CLZ(n | 1) ^ 31); + return t - (n < data::zero_or_powers_of_10_32_new[t]); } #endif @@ -893,7 +1035,7 @@ template <typename Char> void copy2(Char* dst, const char* src) { *dst++ = static_cast<Char>(*src++); *dst = static_cast<Char>(*src); } -inline void copy2(char* dst, const char* src) { memcpy(dst, src, 2); } +FMT_INLINE void copy2(char* dst, const char* src) { memcpy(dst, src, 2); } template <typename Iterator> struct format_decimal_result { Iterator begin; @@ -929,11 +1071,10 @@ inline format_decimal_result<Char*> format_decimal(Char* out, UInt value, template <typename Char, typename UInt, typename Iterator, FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<Iterator>>::value)> inline format_decimal_result<Iterator> format_decimal(Iterator out, UInt value, - int num_digits) { - // Buffer should be large enough to hold all digits (<= digits10 + 1). - enum { max_size = digits10<UInt>() + 1 }; - Char buffer[2 * max_size]; - auto end = format_decimal(buffer, value, num_digits).end; + int size) { + // Buffer is large enough to hold all digits (digits10 + 1). + Char buffer[digits10<UInt>() + 1]; + auto end = format_decimal(buffer, value, size).end; return {out, detail::copy_str<Char>(buffer, end, out)}; } @@ -975,6 +1116,10 @@ Char* format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits, template <unsigned BASE_BITS, typename Char, typename It, typename UInt> inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { + if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) { + format_uint<BASE_BITS>(ptr, value, num_digits, upper); + return out; + } // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). char buffer[num_bits<UInt>() / BASE_BITS + 1]; format_uint<BASE_BITS>(buffer, value, num_digits, upper); @@ -1000,8 +1145,8 @@ template <typename T = void> struct null {}; template <typename Char> struct fill_t { private: enum { max_size = 4 }; - Char data_[max_size]; - unsigned char size_; + Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)}; + unsigned char size_ = 1; public: FMT_CONSTEXPR void operator=(basic_string_view<Char> s) { @@ -1021,13 +1166,6 @@ template <typename Char> struct fill_t { FMT_CONSTEXPR const Char& operator[](size_t index) const { return data_[index]; } - - static FMT_CONSTEXPR fill_t<Char> make() { - auto fill = fill_t<Char>(); - fill[0] = Char(' '); - fill.size_ = 1; - return fill; - } }; } // namespace detail @@ -1059,13 +1197,84 @@ template <typename Char> struct basic_format_specs { type(0), align(align::none), sign(sign::none), - alt(false), - fill(detail::fill_t<Char>::make()) {} + alt(false) {} }; using format_specs = basic_format_specs<char>; namespace detail { +namespace dragonbox { + +// Type-specific information that Dragonbox uses. +template <class T> struct float_info; + +template <> struct float_info<float> { + using carrier_uint = uint32_t; + static const int significand_bits = 23; + static const int exponent_bits = 8; + static const int min_exponent = -126; + static const int max_exponent = 127; + static const int exponent_bias = -127; + static const int decimal_digits = 9; + static const int kappa = 1; + static const int big_divisor = 100; + static const int small_divisor = 10; + static const int min_k = -31; + static const int max_k = 46; + static const int cache_bits = 64; + static const int divisibility_check_by_5_threshold = 39; + static const int case_fc_pm_half_lower_threshold = -1; + static const int case_fc_pm_half_upper_threshold = 6; + static const int case_fc_lower_threshold = -2; + static const int case_fc_upper_threshold = 6; + static const int case_shorter_interval_left_endpoint_lower_threshold = 2; + static const int case_shorter_interval_left_endpoint_upper_threshold = 3; + static const int shorter_interval_tie_lower_threshold = -35; + static const int shorter_interval_tie_upper_threshold = -35; + static const int max_trailing_zeros = 7; +}; + +template <> struct float_info<double> { + using carrier_uint = uint64_t; + static const int significand_bits = 52; + static const int exponent_bits = 11; + static const int min_exponent = -1022; + static const int max_exponent = 1023; + static const int exponent_bias = -1023; + static const int decimal_digits = 17; + static const int kappa = 2; + static const int big_divisor = 1000; + static const int small_divisor = 100; + static const int min_k = -292; + static const int max_k = 326; + static const int cache_bits = 128; + static const int divisibility_check_by_5_threshold = 86; + static const int case_fc_pm_half_lower_threshold = -2; + static const int case_fc_pm_half_upper_threshold = 9; + static const int case_fc_lower_threshold = -4; + static const int case_fc_upper_threshold = 9; + static const int case_shorter_interval_left_endpoint_lower_threshold = 2; + static const int case_shorter_interval_left_endpoint_upper_threshold = 3; + static const int shorter_interval_tie_lower_threshold = -77; + static const int shorter_interval_tie_upper_threshold = -77; + static const int max_trailing_zeros = 16; +}; + +template <typename T> struct decimal_fp { + using significand_type = typename float_info<T>::carrier_uint; + significand_type significand; + int exponent; +}; + +template <typename T> FMT_API decimal_fp<T> to_decimal(T x) FMT_NOEXCEPT; +} // namespace dragonbox + +template <typename T> +constexpr typename dragonbox::float_info<T>::carrier_uint exponent_mask() { + using uint = typename dragonbox::float_info<T>::carrier_uint; + return ((uint(1) << dragonbox::float_info<T>::exponent_bits) - 1) + << dragonbox::float_info<T>::significand_bits; +} // A floating-point presentation format. enum class float_format : unsigned char { @@ -1107,113 +1316,6 @@ template <typename Char, typename It> It write_exponent(int exp, It it) { return it; } -template <typename Char> class float_writer { - private: - // The number is given as v = digits_ * pow(10, exp_). - const char* digits_; - int num_digits_; - int exp_; - size_t size_; - float_specs specs_; - Char decimal_point_; - - template <typename It> It prettify(It it) const { - // pow(10, full_exp - 1) <= v <= pow(10, full_exp). - int full_exp = num_digits_ + exp_; - if (specs_.format == float_format::exp) { - // Insert a decimal point after the first digit and add an exponent. - *it++ = static_cast<Char>(*digits_); - int num_zeros = specs_.precision - num_digits_; - if (num_digits_ > 1 || specs_.showpoint) *it++ = decimal_point_; - it = copy_str<Char>(digits_ + 1, digits_ + num_digits_, it); - if (num_zeros > 0 && specs_.showpoint) - it = std::fill_n(it, num_zeros, static_cast<Char>('0')); - *it++ = static_cast<Char>(specs_.upper ? 'E' : 'e'); - return write_exponent<Char>(full_exp - 1, it); - } - if (num_digits_ <= full_exp) { - // 1234e7 -> 12340000000[.0+] - it = copy_str<Char>(digits_, digits_ + num_digits_, it); - it = std::fill_n(it, full_exp - num_digits_, static_cast<Char>('0')); - if (specs_.showpoint || specs_.precision < 0) { - *it++ = decimal_point_; - int num_zeros = specs_.precision - full_exp; - if (num_zeros <= 0) { - if (specs_.format != float_format::fixed) - *it++ = static_cast<Char>('0'); - return it; - } -#ifdef FMT_FUZZ - if (num_zeros > 5000) - throw std::runtime_error("fuzz mode - avoiding excessive cpu use"); -#endif - it = std::fill_n(it, num_zeros, static_cast<Char>('0')); - } - } else if (full_exp > 0) { - // 1234e-2 -> 12.34[0+] - it = copy_str<Char>(digits_, digits_ + full_exp, it); - if (!specs_.showpoint) { - // Remove trailing zeros. - int num_digits = num_digits_; - while (num_digits > full_exp && digits_[num_digits - 1] == '0') - --num_digits; - if (num_digits != full_exp) *it++ = decimal_point_; - return copy_str<Char>(digits_ + full_exp, digits_ + num_digits, it); - } - *it++ = decimal_point_; - it = copy_str<Char>(digits_ + full_exp, digits_ + num_digits_, it); - if (specs_.precision > num_digits_) { - // Add trailing zeros. - int num_zeros = specs_.precision - num_digits_; - it = std::fill_n(it, num_zeros, static_cast<Char>('0')); - } - } else { - // 1234e-6 -> 0.001234 - *it++ = static_cast<Char>('0'); - int num_zeros = -full_exp; - int num_digits = num_digits_; - if (num_digits == 0 && specs_.precision >= 0 && - specs_.precision < num_zeros) { - num_zeros = specs_.precision; - } - // Remove trailing zeros. - if (!specs_.showpoint) - while (num_digits > 0 && digits_[num_digits - 1] == '0') --num_digits; - if (num_zeros != 0 || num_digits != 0 || specs_.showpoint) { - *it++ = decimal_point_; - it = std::fill_n(it, num_zeros, static_cast<Char>('0')); - it = copy_str<Char>(digits_, digits_ + num_digits, it); - } - } - return it; - } - - public: - float_writer(const char* digits, int num_digits, int exp, float_specs specs, - Char decimal_point) - : digits_(digits), - num_digits_(num_digits), - exp_(exp), - specs_(specs), - decimal_point_(decimal_point) { - int full_exp = num_digits + exp - 1; - int precision = specs.precision > 0 ? specs.precision : 16; - if (specs_.format == float_format::general && - !(full_exp >= -4 && full_exp < precision)) { - specs_.format = float_format::exp; - } - size_ = prettify(counting_iterator()).count(); - size_ += specs.sign ? 1 : 0; - } - - size_t size() const { return size_; } - - template <typename It> It operator()(It it) const { - if (specs_.sign) *it++ = static_cast<Char>(data::signs[specs_.sign]); - return prettify(it); - } -}; - template <typename T> int format_float(T value, int precision, float_specs specs, buffer<char>& buf); @@ -1392,7 +1494,7 @@ template <align::type align = align::left, typename OutputIt, typename Char, typename F> inline OutputIt write_padded(OutputIt out, const basic_format_specs<Char>& specs, size_t size, - size_t width, const F& f) { + size_t width, F&& f) { static_assert(align == align::left || align == align::right, ""); unsigned spec_width = to_unsigned(specs.width); size_t padding = spec_width > width ? spec_width - width : 0; @@ -1410,7 +1512,7 @@ template <align::type align = align::left, typename OutputIt, typename Char, typename F> inline OutputIt write_padded(OutputIt out, const basic_format_specs<Char>& specs, size_t size, - const F& f) { + F&& f) { return write_padded<align>(out, specs, size, size, f); } @@ -1577,15 +1679,16 @@ template <typename OutputIt, typename Char, typename UInt> struct int_writer { char digits[40]; format_decimal(digits, abs_value, num_digits); basic_memory_buffer<Char> buffer; - size += prefix_size; - buffer.resize(size); + size += static_cast<int>(prefix_size); + const auto usize = to_unsigned(size); + buffer.resize(usize); basic_string_view<Char> s(&sep, sep_size); // Index of a decimal digit with the least significant digit having index 0. int digit_index = 0; group = groups.cbegin(); - auto p = buffer.data() + size; - for (int i = num_digits - 1; i >= 0; --i) { - *--p = static_cast<Char>(digits[i]); + auto p = buffer.data() + size - 1; + for (int i = num_digits - 1; i > 0; --i) { + *p-- = static_cast<Char>(digits[i]); if (*group <= 0 || ++digit_index % *group != 0 || *group == max_value<char>()) continue; @@ -1593,16 +1696,16 @@ template <typename OutputIt, typename Char, typename UInt> struct int_writer { digit_index = 0; ++group; } - p -= s.size(); std::uninitialized_copy(s.data(), s.data() + s.size(), make_checked(p, s.size())); + p -= s.size(); } - if (prefix_size != 0) p[-1] = static_cast<Char>('-'); - using iterator = remove_reference_t<decltype(reserve(out, 0))>; + *p-- = static_cast<Char>(*digits); + if (prefix_size != 0) *p = static_cast<Char>('-'); auto data = buffer.data(); - out = write_padded<align::right>(out, specs, size, size, [=](iterator it) { - return copy_str<Char>(data, data + size, it); - }); + out = write_padded<align::right>( + out, specs, usize, usize, + [=](iterator it) { return copy_str<Char>(data, data + size, it); }); } void on_chr() { *out++ = static_cast<Char>(abs_value); } @@ -1628,6 +1731,168 @@ OutputIt write_nonfinite(OutputIt out, bool isinf, }); } +// A decimal floating-point number significand * pow(10, exp). +struct big_decimal_fp { + const char* significand; + int significand_size; + int exponent; +}; + +inline int get_significand_size(const big_decimal_fp& fp) { + return fp.significand_size; +} +template <typename T> +inline int get_significand_size(const dragonbox::decimal_fp<T>& fp) { + return count_digits(fp.significand); +} + +template <typename Char, typename OutputIt> +inline OutputIt write_significand(OutputIt out, const char* significand, + int& significand_size) { + return copy_str<Char>(significand, significand + significand_size, out); +} +template <typename Char, typename OutputIt, typename UInt> +inline OutputIt write_significand(OutputIt out, UInt significand, + int significand_size) { + return format_decimal<Char>(out, significand, significand_size).end; +} + +template <typename Char, typename UInt, + FMT_ENABLE_IF(std::is_integral<UInt>::value)> +inline Char* write_significand(Char* out, UInt significand, + int significand_size, int integral_size, + Char decimal_point) { + if (!decimal_point) + return format_decimal(out, significand, significand_size).end; + auto end = format_decimal(out + 1, significand, significand_size).end; + if (integral_size == 1) + out[0] = out[1]; + else + std::copy_n(out + 1, integral_size, out); + out[integral_size] = decimal_point; + return end; +} + +template <typename OutputIt, typename UInt, typename Char, + FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<OutputIt>>::value)> +inline OutputIt write_significand(OutputIt out, UInt significand, + int significand_size, int integral_size, + Char decimal_point) { + // Buffer is large enough to hold digits (digits10 + 1) and a decimal point. + Char buffer[digits10<UInt>() + 2]; + auto end = write_significand(buffer, significand, significand_size, + integral_size, decimal_point); + return detail::copy_str<Char>(buffer, end, out); +} + +template <typename OutputIt, typename Char> +inline OutputIt write_significand(OutputIt out, const char* significand, + int significand_size, int integral_size, + Char decimal_point) { + out = detail::copy_str<Char>(significand, significand + integral_size, out); + if (!decimal_point) return out; + *out++ = decimal_point; + return detail::copy_str<Char>(significand + integral_size, + significand + significand_size, out); +} + +template <typename OutputIt, typename DecimalFP, typename Char> +OutputIt write_float(OutputIt out, const DecimalFP& fp, + const basic_format_specs<Char>& specs, float_specs fspecs, + Char decimal_point) { + auto significand = fp.significand; + int significand_size = get_significand_size(fp); + static const Char zero = static_cast<Char>('0'); + auto sign = fspecs.sign; + size_t size = to_unsigned(significand_size) + (sign ? 1 : 0); + using iterator = remove_reference_t<decltype(reserve(out, 0))>; + + int output_exp = fp.exponent + significand_size - 1; + auto use_exp_format = [=]() { + if (fspecs.format == float_format::exp) return true; + if (fspecs.format != float_format::general) return false; + // Use the fixed notation if the exponent is in [exp_lower, exp_upper), + // e.g. 0.0001 instead of 1e-04. Otherwise use the exponent notation. + const int exp_lower = -4, exp_upper = 16; + return output_exp < exp_lower || + output_exp >= (fspecs.precision > 0 ? fspecs.precision : exp_upper); + }; + if (use_exp_format()) { + int num_zeros = 0; + if (fspecs.showpoint) { + num_zeros = (std::max)(fspecs.precision - significand_size, 0); + size += to_unsigned(num_zeros); + } else if (significand_size == 1) { + decimal_point = Char(); + } + auto abs_output_exp = output_exp >= 0 ? output_exp : -output_exp; + int exp_digits = 2; + if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3; + + size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits); + char exp_char = fspecs.upper ? 'E' : 'e'; + auto write = [=](iterator it) { + if (sign) *it++ = static_cast<Char>(data::signs[sign]); + // Insert a decimal point after the first digit and add an exponent. + it = write_significand(it, significand, significand_size, 1, + decimal_point); + if (num_zeros > 0) it = std::fill_n(it, num_zeros, zero); + *it++ = static_cast<Char>(exp_char); + return write_exponent<Char>(output_exp, it); + }; + return specs.width > 0 ? write_padded<align::right>(out, specs, size, write) + : base_iterator(out, write(reserve(out, size))); + } + + int exp = fp.exponent + significand_size; + if (fp.exponent >= 0) { + // 1234e5 -> 123400000[.0+] + size += to_unsigned(fp.exponent); + int num_zeros = fspecs.precision - exp; +#ifdef FMT_FUZZ + if (num_zeros > 5000) + throw std::runtime_error("fuzz mode - avoiding excessive cpu use"); +#endif + if (fspecs.showpoint) { + if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1; + if (num_zeros > 0) size += to_unsigned(num_zeros); + } + return write_padded<align::right>(out, specs, size, [&](iterator it) { + if (sign) *it++ = static_cast<Char>(data::signs[sign]); + it = write_significand<Char>(it, significand, significand_size); + it = std::fill_n(it, fp.exponent, zero); + if (!fspecs.showpoint) return it; + *it++ = decimal_point; + return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it; + }); + } else if (exp > 0) { + // 1234e-2 -> 12.34[0+] + int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0; + size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0); + return write_padded<align::right>(out, specs, size, [&](iterator it) { + if (sign) *it++ = static_cast<Char>(data::signs[sign]); + it = write_significand(it, significand, significand_size, exp, + decimal_point); + return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it; + }); + } + // 1234e-6 -> 0.001234 + int num_zeros = -exp; + if (significand_size == 0 && fspecs.precision >= 0 && + fspecs.precision < num_zeros) { + num_zeros = fspecs.precision; + } + size += 2 + to_unsigned(num_zeros); + return write_padded<align::right>(out, specs, size, [&](iterator it) { + if (sign) *it++ = static_cast<Char>(data::signs[sign]); + *it++ = zero; + if (num_zeros == 0 && significand_size == 0 && !fspecs.showpoint) return it; + *it++ = decimal_point; + it = std::fill_n(it, num_zeros, zero); + return write_significand<Char>(it, significand, significand_size); + }); +} + template <typename Char, typename OutputIt, typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)> OutputIt write(OutputIt out, T value, basic_format_specs<Char> specs, @@ -1667,39 +1932,45 @@ OutputIt write(OutputIt out, T value, basic_format_specs<Char> specs, ++precision; } if (const_check(std::is_same<T, float>())) fspecs.binary32 = true; - fspecs.use_grisu = use_grisu<T>(); + fspecs.use_grisu = is_fast_float<T>(); int exp = format_float(promote_float(value), precision, fspecs, buffer); fspecs.precision = precision; Char point = fspecs.locale ? decimal_point<Char>(loc) : static_cast<Char>('.'); - float_writer<Char> w(buffer.data(), static_cast<int>(buffer.size()), exp, - fspecs, point); - return write_padded<align::right>(out, specs, w.size(), w); + auto fp = big_decimal_fp{buffer.data(), static_cast<int>(buffer.size()), exp}; + return write_float(out, fp, specs, fspecs, point); } template <typename Char, typename OutputIt, typename T, - FMT_ENABLE_IF(std::is_floating_point<T>::value)> + FMT_ENABLE_IF(is_fast_float<T>::value)> OutputIt write(OutputIt out, T value) { if (const_check(!is_supported_floating_point(value))) return out; + + using floaty = conditional_t<std::is_same<T, long double>::value, double, T>; + using uint = typename dragonbox::float_info<floaty>::carrier_uint; + auto bits = bit_cast<uint>(value); + auto fspecs = float_specs(); - if (std::signbit(value)) { // value < 0 is false for NaN so use signbit. + auto sign_bit = bits & (uint(1) << (num_bits<uint>() - 1)); + if (sign_bit != 0) { fspecs.sign = sign::minus; value = -value; } - auto specs = basic_format_specs<Char>(); - if (!std::isfinite(value)) + static const auto specs = basic_format_specs<Char>(); + uint mask = exponent_mask<floaty>(); + if ((bits & mask) == mask) return write_nonfinite(out, std::isinf(value), specs, fspecs); - memory_buffer buffer; - int precision = -1; - if (const_check(std::is_same<T, float>())) fspecs.binary32 = true; - fspecs.use_grisu = use_grisu<T>(); - int exp = format_float(promote_float(value), precision, fspecs, buffer); - fspecs.precision = precision; - float_writer<Char> w(buffer.data(), static_cast<int>(buffer.size()), exp, - fspecs, static_cast<Char>('.')); - return base_iterator(out, w(reserve(out, w.size()))); + auto dec = dragonbox::to_decimal(static_cast<floaty>(value)); + return write_float(out, dec, specs, fspecs, static_cast<Char>('.')); +} + +template <typename Char, typename OutputIt, typename T, + FMT_ENABLE_IF(std::is_floating_point<T>::value && + !is_fast_float<T>::value)> +inline OutputIt write(OutputIt out, T value) { + return write(out, value, basic_format_specs<Char>()); } template <typename Char, typename OutputIt> @@ -1752,6 +2023,13 @@ OutputIt write(OutputIt out, basic_string_view<Char> value) { return base_iterator(out, it); } +template <typename Char> +buffer_appender<Char> write(buffer_appender<Char> out, + basic_string_view<Char> value) { + get_container(out).append(value.begin(), value.end()); + return out; +} + template <typename Char, typename OutputIt, typename T, FMT_ENABLE_IF(is_integral<T>::value && !std::is_same<T, bool>::value && @@ -1762,7 +2040,13 @@ OutputIt write(OutputIt out, T value) { // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. if (negative) abs_value = ~abs_value + 1; int num_digits = count_digits(abs_value); - auto it = reserve(out, (negative ? 1 : 0) + static_cast<size_t>(num_digits)); + auto size = (negative ? 1 : 0) + static_cast<size_t>(num_digits); + auto it = reserve(out, size); + if (auto ptr = to_pointer<Char>(it, size)) { + if (negative) *ptr++ = static_cast<Char>('-'); + format_decimal<Char>(ptr, abs_value, num_digits); + return out; + } if (negative) *it++ = static_cast<Char>('-'); it = format_decimal<Char>(it, abs_value, num_digits).end; return base_iterator(out, it); @@ -1801,8 +2085,13 @@ auto write(OutputIt out, const T& value) -> typename std::enable_if< mapped_type_constant<T, basic_format_context<OutputIt, Char>>::value == type::custom_type, OutputIt>::type { - basic_format_context<OutputIt, Char> ctx(out, {}, {}); - return formatter<T>().format(value, ctx); + using context_type = basic_format_context<OutputIt, Char>; + using formatter_type = + conditional_t<has_formatter<T, context_type>::value, + typename context_type::template formatter_type<T>, + fallback_formatter<T, Char>>; + context_type ctx(out, {}, {}); + return formatter_type().format(value, ctx); } // An argument visitor that formats the argument and writes it via the output @@ -2008,6 +2297,48 @@ class arg_formatter_base { } }; +/** The default argument formatter. */ +template <typename OutputIt, typename Char> +class arg_formatter : public arg_formatter_base<OutputIt, Char> { + private: + using char_type = Char; + using base = arg_formatter_base<OutputIt, Char>; + using context_type = basic_format_context<OutputIt, Char>; + + context_type& ctx_; + basic_format_parse_context<char_type>* parse_ctx_; + const Char* ptr_; + + public: + using iterator = typename base::iterator; + using format_specs = typename base::format_specs; + + /** + \rst + Constructs an argument formatter object. + *ctx* is a reference to the formatting context, + *specs* contains format specifier information for standard argument types. + \endrst + */ + explicit arg_formatter( + context_type& ctx, + basic_format_parse_context<char_type>* parse_ctx = nullptr, + format_specs* specs = nullptr, const Char* ptr = nullptr) + : base(ctx.out(), specs, ctx.locale()), + ctx_(ctx), + parse_ctx_(parse_ctx), + ptr_(ptr) {} + + using base::operator(); + + /** Formats an argument of a user-defined type. */ + iterator operator()(typename basic_format_arg<context_type>::handle handle) { + if (ptr_) advance_to(*parse_ctx_, ptr_); + handle.format(*parse_ctx_, ctx_); + return ctx_.out(); + } +}; + template <typename Char> FMT_CONSTEXPR bool is_name_start(Char c) { return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; } @@ -2047,12 +2378,11 @@ template <typename Context> class custom_formatter { Context& ctx) : parse_ctx_(parse_ctx), ctx_(ctx) {} - bool operator()(typename basic_format_arg<Context>::handle h) const { + void operator()(typename basic_format_arg<Context>::handle h) const { h.format(parse_ctx_, ctx_); - return true; } - template <typename T> bool operator()(T) const { return false; } + template <typename T> void operator()(T) const {} }; template <typename T> @@ -2434,12 +2764,30 @@ template <typename SpecHandler, typename Char> struct precision_adapter { }; template <typename Char> -FMT_CONSTEXPR const Char* next_code_point(const Char* begin, const Char* end) { - if (const_check(sizeof(Char) != 1) || (*begin & 0x80) == 0) return begin + 1; - do { - ++begin; - } while (begin != end && (*begin & 0xc0) == 0x80); - return begin; +FMT_CONSTEXPR int code_point_length(const Char* begin) { + if (const_check(sizeof(Char) != 1)) return 1; + constexpr char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; + int len = lengths[static_cast<unsigned char>(*begin) >> 3]; + + // Compute the pointer to the next character early so that the next + // iteration can start working on the next character. Neither Clang + // nor GCC figure out this reordering on their own. + return len + !len; +} + +template <typename Char> constexpr bool is_ascii_letter(Char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +// Converts a character to ASCII. Returns a number > 127 on conversion failure. +template <typename Char, FMT_ENABLE_IF(std::is_integral<Char>::value)> +constexpr Char to_ascii(Char value) { + return value; +} +template <typename Char, FMT_ENABLE_IF(std::is_enum<Char>::value)> +constexpr typename std::underlying_type<Char>::type to_ascii(Char value) { + return value; } // Parses fill and alignment. @@ -2448,10 +2796,10 @@ FMT_CONSTEXPR const Char* parse_align(const Char* begin, const Char* end, Handler&& handler) { FMT_ASSERT(begin != end, ""); auto align = align::none; - auto p = next_code_point(begin, end); - if (p == end) p = begin; + auto p = begin + code_point_length(begin); + if (p >= end) p = begin; for (;;) { - switch (static_cast<char>(*p)) { + switch (to_ascii(*p)) { case '<': align = align::left; break; @@ -2530,13 +2878,13 @@ FMT_CONSTEXPR const Char* parse_precision(const Char* begin, const Char* end, template <typename Char, typename SpecHandler> FMT_CONSTEXPR const Char* parse_format_specs(const Char* begin, const Char* end, SpecHandler&& handler) { - if (begin == end || *begin == '}') return begin; + if (begin == end) return begin; begin = parse_align(begin, end, handler); if (begin == end) return begin; // Parse sign. - switch (static_cast<char>(*begin)) { + switch (to_ascii(*begin)) { case '+': handler.on_plus(); ++begin; @@ -2613,7 +2961,7 @@ FMT_CONSTEXPR const Char* parse_replacement_field(const Char* begin, Handler&& handler) { ++begin; if (begin == end) return handler.on_error("invalid format string"), end; - if (static_cast<char>(*begin) == '}') { + if (*begin == '}') { handler.on_replacement_field(handler.on_arg_id(), begin); } else if (*begin == '{') { handler.on_text(begin, begin + 1); @@ -2658,17 +3006,17 @@ FMT_CONSTEXPR_DECL FMT_INLINE void parse_format_string( return; } struct writer { - FMT_CONSTEXPR void operator()(const Char* begin, const Char* end) { - if (begin == end) return; + FMT_CONSTEXPR void operator()(const Char* pbegin, const Char* pend) { + if (pbegin == pend) return; for (;;) { const Char* p = nullptr; - if (!find<IS_CONSTEXPR>(begin, end, '}', p)) - return handler_.on_text(begin, end); + if (!find<IS_CONSTEXPR>(pbegin, pend, '}', p)) + return handler_.on_text(pbegin, pend); ++p; - if (p == end || *p != '}') + if (p == pend || *p != '}') return handler_.on_error("unmatched '}' in format string"); - handler_.on_text(begin, p); - begin = p + 1; + handler_.on_text(pbegin, p); + pbegin = p + 1; } } Handler& handler_; @@ -2699,13 +3047,12 @@ FMT_CONSTEXPR const typename ParseContext::char_type* parse_format_specs( return f.parse(ctx); } -template <typename ArgFormatter, typename Char, typename Context> +template <typename OutputIt, typename Char, typename Context> struct format_handler : detail::error_handler { basic_format_parse_context<Char> parse_context; Context context; - format_handler(typename ArgFormatter::iterator out, - basic_string_view<Char> str, + format_handler(OutputIt out, basic_string_view<Char> str, basic_format_args<Context> format_args, detail::locale_ref loc) : parse_context(str), context(out, format_args, loc) {} @@ -2728,26 +3075,33 @@ struct format_handler : detail::error_handler { FMT_INLINE void on_replacement_field(int id, const Char*) { auto arg = get_arg(context, id); context.advance_to(visit_format_arg( - default_arg_formatter<typename ArgFormatter::iterator, Char>{ - context.out(), context.args(), context.locale()}, + default_arg_formatter<OutputIt, Char>{context.out(), context.args(), + context.locale()}, arg)); } const Char* on_format_specs(int id, const Char* begin, const Char* end) { - advance_to(parse_context, begin); auto arg = get_arg(context, id); - custom_formatter<Context> f(parse_context, context); - if (visit_format_arg(f, arg)) return parse_context.begin(); - basic_format_specs<Char> specs; - using parse_context_t = basic_format_parse_context<Char>; - specs_checker<specs_handler<parse_context_t, Context>> handler( - specs_handler<parse_context_t, Context>(specs, parse_context, context), - arg.type()); - begin = parse_format_specs(begin, end, handler); - if (begin == end || *begin != '}') on_error("missing '}' in format string"); - advance_to(parse_context, begin); - context.advance_to( - visit_format_arg(ArgFormatter(context, &parse_context, &specs), arg)); + if (arg.type() == type::custom_type) { + advance_to(parse_context, begin); + visit_format_arg(custom_formatter<Context>(parse_context, context), arg); + return parse_context.begin(); + } + auto specs = basic_format_specs<Char>(); + if (begin + 1 < end && begin[1] == '}' && is_ascii_letter(*begin)) { + specs.type = static_cast<char>(*begin++); + } else { + using parse_context_t = basic_format_parse_context<Char>; + specs_checker<specs_handler<parse_context_t, Context>> handler( + specs_handler<parse_context_t, Context>(specs, parse_context, + context), + arg.type()); + begin = parse_format_specs(begin, end, handler); + if (begin == end || *begin != '}') + on_error("missing '}' in format string"); + } + context.advance_to(visit_format_arg( + arg_formatter<OutputIt, Char>(context, &parse_context, &specs), arg)); return begin; } }; @@ -2899,53 +3253,11 @@ FMT_API void format_error_code(buffer<char>& out, int error_code, FMT_API void report_error(format_func func, int error_code, string_view message) FMT_NOEXCEPT; - -/** The default argument formatter. */ -template <typename OutputIt, typename Char> -class arg_formatter : public arg_formatter_base<OutputIt, Char> { - private: - using char_type = Char; - using base = arg_formatter_base<OutputIt, Char>; - using context_type = basic_format_context<OutputIt, Char>; - - context_type& ctx_; - basic_format_parse_context<char_type>* parse_ctx_; - const Char* ptr_; - - public: - using iterator = typename base::iterator; - using format_specs = typename base::format_specs; - - /** - \rst - Constructs an argument formatter object. - *ctx* is a reference to the formatting context, - *specs* contains format specifier information for standard argument types. - \endrst - */ - explicit arg_formatter( - context_type& ctx, - basic_format_parse_context<char_type>* parse_ctx = nullptr, - format_specs* specs = nullptr, const Char* ptr = nullptr) - : base(ctx.out(), specs, ctx.locale()), - ctx_(ctx), - parse_ctx_(parse_ctx), - ptr_(ptr) {} - - using base::operator(); - - /** Formats an argument of a user-defined type. */ - iterator operator()(typename basic_format_arg<context_type>::handle handle) { - if (ptr_) advance_to(*parse_ctx_, ptr_); - handle.format(*parse_ctx_, ctx_); - return ctx_.out(); - } -}; } // namespace detail template <typename OutputIt, typename Char> using arg_formatter FMT_DEPRECATED_ALIAS = - detail::arg_formatter<OutputIt, Char>; + detail::arg_formatter<OutputIt, Char>; /** An error returned by an operating system or a language runtime, @@ -3208,8 +3520,10 @@ struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> { // using variant = std::variant<int, std::string>; // template <> // struct formatter<variant>: dynamic_formatter<> { -// void format(buffer &buf, const variant &v, context &ctx) { -// visit([&](const auto &val) { format(buf, val, ctx); }, v); +// auto format(const variant& v, format_context& ctx) { +// return visit([&](const auto& val) { +// return dynamic_formatter<>::format(val, ctx); +// }, v); // } // }; template <typename Char = char> class dynamic_formatter { @@ -3277,28 +3591,15 @@ FMT_CONSTEXPR void advance_to( ctx.advance_to(ctx.begin() + (p - &*ctx.begin())); } -/** Formats arguments and writes the output to the range. */ -template <typename ArgFormatter, typename Char, typename Context> -typename Context::iterator vformat_to( - typename ArgFormatter::iterator out, basic_string_view<Char> format_str, - basic_format_args<Context> args, - detail::locale_ref loc = detail::locale_ref()) { - if (format_str.size() == 2 && detail::equal2(format_str.data(), "{}")) { - auto arg = args.get(0); - if (!arg) detail::error_handler().on_error("argument not found"); - using iterator = typename ArgFormatter::iterator; - return visit_format_arg( - detail::default_arg_formatter<iterator, Char>{out, args, loc}, arg); - } - detail::format_handler<ArgFormatter, Char, Context> h(out, format_str, args, - loc); - detail::parse_format_string<false>(format_str, h); - return h.context.out(); -} +/** + \rst + Converts ``p`` to ``const void*`` for pointer formatting. -// Casts ``p`` to ``const void*`` for pointer formatting. -// Example: -// auto s = format("{}", ptr(p)); + **Example**:: + + auto s = fmt::format("{}", fmt::ptr(p)); + \endrst + */ template <typename T> inline const void* ptr(const T* p) { return p; } template <typename T> inline const void* ptr(const std::unique_ptr<T>& p) { return p.get(); @@ -3317,6 +3618,10 @@ class bytes { }; template <> struct formatter<bytes> { + private: + detail::dynamic_format_specs<char> specs_; + + public: template <typename ParseContext> FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { using handler_type = detail::dynamic_specs_handler<ParseContext>; @@ -3335,9 +3640,6 @@ template <> struct formatter<bytes> { specs_.precision, specs_.precision_ref, ctx); return detail::write_bytes(ctx.out(), b.data_, specs_); } - - private: - detail::dynamic_format_specs<char> specs_; }; template <typename It, typename Sentinel, typename Char> @@ -3402,15 +3704,14 @@ arg_join<It, Sentinel, wchar_t> join(It begin, Sentinel end, wstring_view sep) { \endrst */ template <typename Range> -arg_join<detail::iterator_t<const Range>, detail::sentinel_t<const Range>, char> -join(const Range& range, string_view sep) { +arg_join<detail::iterator_t<Range>, detail::sentinel_t<Range>, char> join( + Range&& range, string_view sep) { return join(std::begin(range), std::end(range), sep); } template <typename Range> -arg_join<detail::iterator_t<const Range>, detail::sentinel_t<const Range>, - wchar_t> -join(const Range& range, wstring_view sep) { +arg_join<detail::iterator_t<Range>, detail::sentinel_t<Range>, wchar_t> join( + Range&& range, wstring_view sep) { return join(std::begin(range), std::end(range), sep); } @@ -3437,7 +3738,7 @@ inline std::string to_string(T value) { // The buffer should be large enough to store the number including the sign or // "false" for bool. constexpr int max_size = detail::digits10<T>() + 2; - char buffer[max_size > 5 ? max_size : 5]; + char buffer[max_size > 5 ? static_cast<unsigned>(max_size) : 5]; char* begin = buffer; return std::string(begin, detail::write<char>(begin, value)); } @@ -3457,18 +3758,30 @@ std::basic_string<Char> to_string(const basic_memory_buffer<Char, SIZE>& buf) { } template <typename Char> -typename buffer_context<Char>::iterator detail::vformat_to( +void detail::vformat_to( detail::buffer<Char>& buf, basic_string_view<Char> format_str, - basic_format_args<buffer_context<type_identity_t<Char>>> args) { - using af = arg_formatter<typename buffer_context<Char>::iterator, Char>; - return vformat_to<af>(std::back_inserter(buf), to_string_view(format_str), - args); + basic_format_args<buffer_context<type_identity_t<Char>>> args, + detail::locale_ref loc) { + using iterator = typename buffer_context<Char>::iterator; + auto out = buffer_appender<Char>(buf); + if (format_str.size() == 2 && equal2(format_str.data(), "{}")) { + auto arg = args.get(0); + if (!arg) error_handler().on_error("argument not found"); + visit_format_arg(default_arg_formatter<iterator, Char>{out, args, loc}, + arg); + return; + } + format_handler<iterator, Char, buffer_context<Char>> h(out, format_str, args, + loc); + parse_format_string<false>(format_str, h); } #ifndef FMT_HEADER_ONLY -extern template format_context::iterator detail::vformat_to( - detail::buffer<char>&, string_view, basic_format_args<format_context>); +extern template void detail::vformat_to(detail::buffer<char>&, string_view, + basic_format_args<format_context>, + detail::locale_ref); namespace detail { + extern template FMT_API std::string grouping_impl<char>(locale_ref loc); extern template FMT_API std::string grouping_impl<wchar_t>(locale_ref loc); extern template FMT_API char thousands_sep_impl<char>(locale_ref loc); @@ -3494,7 +3807,7 @@ extern template int snprintf_float<long double>(long double value, template <typename S, typename Char = char_t<S>, FMT_ENABLE_IF(detail::is_string<S>::value)> -inline typename FMT_BUFFER_CONTEXT(Char)::iterator vformat_to( +inline void vformat_to( detail::buffer<Char>& buf, const S& format_str, basic_format_args<FMT_BUFFER_CONTEXT(type_identity_t<Char>)> args) { return detail::vformat_to(buf, to_string_view(format_str), args); @@ -3504,10 +3817,9 @@ template <typename S, typename... Args, size_t SIZE = inline_buffer_size, typename Char = enable_if_t<detail::is_string<S>::value, char_t<S>>> inline typename buffer_context<Char>::iterator format_to( basic_memory_buffer<Char, SIZE>& buf, const S& format_str, Args&&... args) { - detail::check_format_string<Args...>(format_str); - using context = buffer_context<Char>; - return detail::vformat_to(buf, to_string_view(format_str), - make_format_args<context>(args...)); + const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...); + detail::vformat_to(buf, to_string_view(format_str), vargs); + return detail::buffer_appender<Char>(buf); } template <typename OutputIt, typename Char = char> @@ -3516,88 +3828,17 @@ using format_context_t = basic_format_context<OutputIt, Char>; template <typename OutputIt, typename Char = char> using format_args_t = basic_format_args<format_context_t<OutputIt, Char>>; -template < - typename S, typename OutputIt, typename... Args, - FMT_ENABLE_IF(detail::is_output_iterator<OutputIt>::value && - !detail::is_contiguous_back_insert_iterator<OutputIt>::value)> -inline OutputIt vformat_to( - OutputIt out, const S& format_str, - format_args_t<type_identity_t<OutputIt>, char_t<S>> args) { - using af = detail::arg_formatter<OutputIt, char_t<S>>; - return vformat_to<af>(out, to_string_view(format_str), args); -} - -/** - \rst - Formats arguments, writes the result to the output iterator ``out`` and returns - the iterator past the end of the output range. - - **Example**:: - - std::vector<char> out; - fmt::format_to(std::back_inserter(out), "{}", 42); - \endrst - */ -template <typename OutputIt, typename S, typename... Args, - FMT_ENABLE_IF( - detail::is_output_iterator<OutputIt>::value && - !detail::is_contiguous_back_insert_iterator<OutputIt>::value && - detail::is_string<S>::value)> -inline OutputIt format_to(OutputIt out, const S& format_str, Args&&... args) { - detail::check_format_string<Args...>(format_str); - using context = format_context_t<OutputIt, char_t<S>>; - return vformat_to(out, to_string_view(format_str), - make_format_args<context>(args...)); -} - -template <typename OutputIt> struct format_to_n_result { - /** Iterator past the end of the output range. */ - OutputIt out; - /** Total (not truncated) output size. */ - size_t size; -}; - template <typename OutputIt, typename Char = typename OutputIt::value_type> -using format_to_n_context = - format_context_t<detail::truncating_iterator<OutputIt>, Char>; +using format_to_n_context FMT_DEPRECATED_ALIAS = buffer_context<Char>; template <typename OutputIt, typename Char = typename OutputIt::value_type> -using format_to_n_args = basic_format_args<format_to_n_context<OutputIt, Char>>; +using format_to_n_args FMT_DEPRECATED_ALIAS = + basic_format_args<buffer_context<Char>>; template <typename OutputIt, typename Char, typename... Args> -inline format_arg_store<format_to_n_context<OutputIt, Char>, Args...> +FMT_DEPRECATED format_arg_store<buffer_context<Char>, Args...> make_format_to_n_args(const Args&... args) { - return format_arg_store<format_to_n_context<OutputIt, Char>, Args...>( - args...); -} - -template <typename OutputIt, typename Char, typename... Args, - FMT_ENABLE_IF(detail::is_output_iterator<OutputIt>::value)> -inline format_to_n_result<OutputIt> vformat_to_n( - OutputIt out, size_t n, basic_string_view<Char> format_str, - format_to_n_args<type_identity_t<OutputIt>, type_identity_t<Char>> args) { - auto it = vformat_to(detail::truncating_iterator<OutputIt>(out, n), - format_str, args); - return {it.base(), it.count()}; -} - -/** - \rst - Formats arguments, writes up to ``n`` characters of the result to the output - iterator ``out`` and returns the total output size and the iterator past the - end of the output range. - \endrst - */ -template <typename OutputIt, typename S, typename... Args, - FMT_ENABLE_IF(detail::is_string<S>::value&& - detail::is_output_iterator<OutputIt>::value)> -inline format_to_n_result<OutputIt> format_to_n(OutputIt out, size_t n, - const S& format_str, - const Args&... args) { - detail::check_format_string<Args...>(format_str); - using context = format_to_n_context<OutputIt, char_t<S>>; - return vformat_to_n(out, n, to_string_view(format_str), - make_format_args<context>(args...)); + return format_arg_store<buffer_context<Char>, Args...>(args...); } template <typename Char, enable_if_t<(!std::is_same<Char, char>::value), int>> @@ -3609,15 +3850,6 @@ std::basic_string<Char> detail::vformat( return to_string(buffer); } -/** - Returns the number of characters in the output of - ``format(format_str, args...)``. - */ -template <typename... Args> -inline size_t formatted_size(string_view format_str, const Args&... args) { - return format_to(detail::counting_iterator(), format_str, args...).count(); -} - template <typename Char, FMT_ENABLE_IF(std::is_same<Char, wchar_t>::value)> void vprint(std::FILE* f, basic_string_view<Char> format_str, wformat_args args) { @@ -3642,8 +3874,7 @@ template <typename Char, Char... CHARS> class udl_formatter { template <typename... Args> std::basic_string<Char> operator()(Args&&... args) const { static FMT_CONSTEXPR_DECL Char s[] = {CHARS..., '\0'}; - check_format_string<remove_cvref_t<Args>...>(FMT_STRING(s)); - return format(s, std::forward<Args>(args)...); + return format(FMT_STRING(s), std::forward<Args>(args)...); } }; # else diff --git a/src/third_party/format.cpp b/src/third_party/format.cpp index a64a1f3..6141d96 100644 --- a/src/third_party/format.cpp +++ b/src/third_party/format.cpp @@ -23,6 +23,36 @@ int format_float(char* buf, std::size_t size, const char* format, int precision, return precision < 0 ? snprintf_ptr(buf, size, format, value) : snprintf_ptr(buf, size, format, precision, value); } + +template FMT_API dragonbox::decimal_fp<float> dragonbox::to_decimal(float x) + FMT_NOEXCEPT; +template FMT_API dragonbox::decimal_fp<double> dragonbox::to_decimal(double x) + FMT_NOEXCEPT; + +// DEPRECATED! This function exists for ABI compatibility. +template <typename Char> +typename basic_format_context<std::back_insert_iterator<buffer<Char>>, + Char>::iterator +vformat_to(buffer<Char>& buf, basic_string_view<Char> format_str, + basic_format_args<basic_format_context< + std::back_insert_iterator<buffer<type_identity_t<Char>>>, + type_identity_t<Char>>> + args) { + using iterator = std::back_insert_iterator<buffer<char>>; + using context = basic_format_context< + std::back_insert_iterator<buffer<type_identity_t<Char>>>, + type_identity_t<Char>>; + auto out = iterator(buf); + format_handler<iterator, Char, context> h(out, format_str, args, {}); + parse_format_string<false>(format_str, h); + return out; +} +template basic_format_context<std::back_insert_iterator<buffer<char>>, + char>::iterator +vformat_to(buffer<char>&, string_view, + basic_format_args<basic_format_context< + std::back_insert_iterator<buffer<type_identity_t<char>>>, + type_identity_t<char>>>); } // namespace detail template struct FMT_INSTANTIATION_DEF_API detail::basic_data<void>; @@ -44,9 +74,9 @@ template FMT_API char detail::decimal_point_impl(locale_ref); template FMT_API void detail::buffer<char>::append(const char*, const char*); -template FMT_API FMT_BUFFER_CONTEXT(char)::iterator detail::vformat_to( +template FMT_API void detail::vformat_to( detail::buffer<char>&, string_view, - basic_format_args<FMT_BUFFER_CONTEXT(char)>); + basic_format_args<FMT_BUFFER_CONTEXT(char)>, detail::locale_ref); template FMT_API int detail::snprintf_float(double, int, detail::float_specs, detail::buffer<char>&); diff --git a/src/third_party/nonstd/optional.hpp b/src/third_party/nonstd/optional.hpp index 33a9b98..8b371e5 100644 --- a/src/third_party/nonstd/optional.hpp +++ b/src/third_party/nonstd/optional.hpp @@ -12,7 +12,7 @@ #define NONSTD_OPTIONAL_LITE_HPP #define optional_lite_MAJOR 3 -#define optional_lite_MINOR 2 +#define optional_lite_MINOR 4 #define optional_lite_PATCH 0 #define optional_lite_VERSION optional_STRINGIFY(optional_lite_MAJOR) "." optional_STRINGIFY(optional_lite_MINOR) "." optional_STRINGIFY(optional_lite_PATCH) @@ -26,6 +26,20 @@ #define optional_OPTIONAL_NONSTD 1 #define optional_OPTIONAL_STD 2 +// tweak header support: + +#ifdef __has_include +# if __has_include(<nonstd/optional.tweak.hpp>) +# include <nonstd/optional.tweak.hpp> +# endif +#define optional_HAVE_TWEAK_HEADER 1 +#else +#define optional_HAVE_TWEAK_HEADER 0 +//# pragma message("optional.hpp: Note: Tweak header not supported.") +#endif + +// optional selection and configuration: + #if !defined( optional_CONFIG_SELECT_OPTIONAL ) # define optional_CONFIG_SELECT_OPTIONAL ( optional_HAVE_STD_OPTIONAL ? optional_OPTIONAL_STD : optional_OPTIONAL_NONSTD ) #endif @@ -33,7 +47,10 @@ // Control presence of exception handling (try and auto discover): #ifndef optional_CONFIG_NO_EXCEPTIONS -# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND) +# if _MSC_VER +# include <cstddef> // for _HAS_EXCEPTIONS +# endif +# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) # define optional_CONFIG_NO_EXCEPTIONS 0 # else # define optional_CONFIG_NO_EXCEPTIONS 1 @@ -227,16 +244,17 @@ namespace nonstd { // Compiler versions: // -// MSVC++ 6.0 _MSC_VER == 1200 (Visual Studio 6.0) -// MSVC++ 7.0 _MSC_VER == 1300 (Visual Studio .NET 2002) -// MSVC++ 7.1 _MSC_VER == 1310 (Visual Studio .NET 2003) -// MSVC++ 8.0 _MSC_VER == 1400 (Visual Studio 2005) -// MSVC++ 9.0 _MSC_VER == 1500 (Visual Studio 2008) -// MSVC++ 10.0 _MSC_VER == 1600 (Visual Studio 2010) -// MSVC++ 11.0 _MSC_VER == 1700 (Visual Studio 2012) -// MSVC++ 12.0 _MSC_VER == 1800 (Visual Studio 2013) -// MSVC++ 14.0 _MSC_VER == 1900 (Visual Studio 2015) -// MSVC++ 14.1 _MSC_VER >= 1910 (Visual Studio 2017) +// MSVC++ 6.0 _MSC_VER == 1200 optional_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) +// MSVC++ 7.0 _MSC_VER == 1300 optional_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) +// MSVC++ 7.1 _MSC_VER == 1310 optional_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) +// MSVC++ 8.0 _MSC_VER == 1400 optional_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) +// MSVC++ 9.0 _MSC_VER == 1500 optional_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) +// MSVC++ 10.0 _MSC_VER == 1600 optional_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) +// MSVC++ 11.0 _MSC_VER == 1700 optional_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) +// MSVC++ 12.0 _MSC_VER == 1800 optional_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) +// MSVC++ 14.0 _MSC_VER == 1900 optional_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) +// MSVC++ 14.1 _MSC_VER >= 1910 optional_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) +// MSVC++ 14.2 _MSC_VER >= 1920 optional_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) #if defined(_MSC_VER ) && !defined(__clang__) # define optional_COMPILER_MSVC_VER (_MSC_VER ) @@ -295,13 +313,26 @@ namespace nonstd { #define optional_CPP14_000 (optional_CPP14_OR_GREATER) #define optional_CPP17_000 (optional_CPP17_OR_GREATER) +// gcc >= 4.9, msvc >= vc14.1 (vs17): +#define optional_CPP11_140_G490 ((optional_CPP11_OR_GREATER_ && optional_COMPILER_GNUC_VERSION >= 490) || (optional_COMPILER_MSVC_VER >= 1910)) + +// clang >= 3.5, msvc >= vc11 (vs12): +#define optional_CPP11_110_C350 ( optional_CPP11_110 && !optional_BETWEEN( optional_COMPILER_CLANG_VERSION, 1, 350 ) ) + +// clang >= 3.5, gcc >= 5.0, msvc >= vc11 (vs12): +#define optional_CPP11_110_C350_G500 \ + ( optional_CPP11_110 && \ + !( optional_BETWEEN( optional_COMPILER_CLANG_VERSION, 1, 350 ) \ + || optional_BETWEEN( optional_COMPILER_GNUC_VERSION , 1, 500 ) ) ) + // Presence of C++11 language features: #define optional_HAVE_CONSTEXPR_11 optional_CPP11_140 #define optional_HAVE_IS_DEFAULT optional_CPP11_140 #define optional_HAVE_NOEXCEPT optional_CPP11_140 #define optional_HAVE_NULLPTR optional_CPP11_100 -#define optional_HAVE_REF_QUALIFIER optional_CPP11_140 +#define optional_HAVE_REF_QUALIFIER optional_CPP11_140_G490 +#define optional_HAVE_INITIALIZER_LIST optional_CPP11_140 // Presence of C++14 language features: @@ -320,6 +351,13 @@ namespace nonstd { #define optional_HAVE_TR1_TYPE_TRAITS (!! optional_COMPILER_GNUC_VERSION ) #define optional_HAVE_TR1_ADD_POINTER (!! optional_COMPILER_GNUC_VERSION ) +#define optional_HAVE_IS_ASSIGNABLE optional_CPP11_110_C350 +#define optional_HAVE_IS_MOVE_CONSTRUCTIBLE optional_CPP11_110_C350 +#define optional_HAVE_IS_NOTHROW_MOVE_ASSIGNABLE optional_CPP11_110_C350 +#define optional_HAVE_IS_NOTHROW_MOVE_CONSTRUCTIBLE optional_CPP11_110_C350 +#define optional_HAVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE optional_CPP11_110_C350_G500 +#define optional_HAVE_IS_TRIVIALLY_MOVE_CONSTRUCTIBLE optional_CPP11_110_C350_G500 + // C++ feature usage: #if optional_HAVE( CONSTEXPR_11 ) @@ -397,7 +435,7 @@ namespace nonstd { template< bool B = (__VA_ARGS__), typename std::enable_if<B, int>::type = 0 > #define optional_REQUIRES_T(...) \ - , typename = typename std::enable_if< (__VA_ARGS__), nonstd::optional_lite::detail::enabler >::type + , typename std::enable_if< (__VA_ARGS__), int >::type = 0 #define optional_REQUIRES_R(R, ...) \ typename std::enable_if< (__VA_ARGS__), R>::type @@ -415,6 +453,12 @@ namespace nonstd { namespace optional_lite { namespace std11 { +template< class T, T v > struct integral_constant { enum { value = v }; }; +template< bool B > struct bool_constant : integral_constant<bool, B>{}; + +typedef bool_constant< true > true_type; +typedef bool_constant< false > false_type; + #if optional_CPP11_OR_GREATER using std::move; #else @@ -428,6 +472,42 @@ namespace std11 { template< typename T, typename F > struct conditional<false, T, F> { typedef F type; }; #endif // optional_HAVE_CONDITIONAL +#if optional_HAVE( IS_ASSIGNABLE ) + using std::is_assignable; +#else + template< class T, class U > struct is_assignable : std11::true_type{}; +#endif + +#if optional_HAVE( IS_MOVE_CONSTRUCTIBLE ) + using std::is_move_constructible; +#else + template< class T > struct is_move_constructible : std11::true_type{}; +#endif + +#if optional_HAVE( IS_NOTHROW_MOVE_ASSIGNABLE ) + using std::is_nothrow_move_assignable; +#else + template< class T > struct is_nothrow_move_assignable : std11::true_type{}; +#endif + +#if optional_HAVE( IS_NOTHROW_MOVE_CONSTRUCTIBLE ) + using std::is_nothrow_move_constructible; +#else + template< class T > struct is_nothrow_move_constructible : std11::true_type{}; +#endif + +#if optional_HAVE( IS_TRIVIALLY_COPY_CONSTRUCTIBLE ) + using std::is_trivially_copy_constructible; +#else + template< class T > struct is_trivially_copy_constructible : std11::true_type{}; +#endif + +#if optional_HAVE( IS_TRIVIALLY_MOVE_CONSTRUCTIBLE ) + using std::is_trivially_move_constructible; +#else + template< class T > struct is_trivially_move_constructible : std11::true_type{}; +#endif + } // namespace std11 #if optional_CPP11_OR_GREATER @@ -450,10 +530,10 @@ using std::swap; struct is_swappable { template< typename T, typename = decltype( swap( std::declval<T&>(), std::declval<T&>() ) ) > - static std::true_type test( int /*unused*/ ); + static std11::true_type test( int /*unused*/ ); template< typename > - static std::false_type test(...); + static std11::false_type test(...); }; struct is_nothrow_swappable @@ -467,10 +547,10 @@ struct is_nothrow_swappable } template< typename T > - static auto test( int /*unused*/ ) -> std::integral_constant<bool, satisfies<T>()>{} + static auto test( int /*unused*/ ) -> std11::integral_constant<bool, satisfies<T>()>{} template< typename > - static auto test(...) -> std::false_type; + static auto test(...) -> std11::false_type; }; } // namespace detail @@ -508,12 +588,6 @@ class optional; namespace detail { -// for optional_REQUIRES_T - -#if optional_CPP11_OR_GREATER -enum class enabler{}; -#endif - // C++11 emulation: struct nulltype{}; @@ -705,6 +779,12 @@ union storage_t } template< class... Args > + storage_t( nonstd_lite_in_place_t(T), Args&&... args ) + { + emplace( std::forward<Args>(args)... ); + } + + template< class... Args > void emplace( Args&&... args ) { ::new( value_ptr() ) value_type( std::forward<Args>(args)... ); @@ -743,7 +823,7 @@ union storage_t return * value_ptr(); } -#if optional_CPP11_OR_GREATER +#if optional_HAVE( REF_QUALIFIER ) optional_nodiscard value_type const && value() const optional_refref_qual { @@ -861,13 +941,15 @@ public: {} // 2 - copy-construct - optional_constexpr14 optional( optional const & other #if optional_CPP11_OR_GREATER - optional_REQUIRES_A( - true || std::is_copy_constructible<T>::value - ) + // template< typename U = T + // optional_REQUIRES_T( + // std::is_copy_constructible<U>::value + // || std11::is_trivially_copy_constructible<U>::value + // ) + // > #endif - ) + optional_constexpr14 optional( optional const & other ) : has_value_( other.has_value() ) { if ( other.has_value() ) @@ -879,12 +961,15 @@ public: #if optional_CPP11_OR_GREATER // 3 (C++11) - move-construct from optional - optional_constexpr14 optional( optional && other - optional_REQUIRES_A( - true || std::is_move_constructible<T>::value + template< typename U = T + optional_REQUIRES_T( + std11::is_move_constructible<U>::value + || std11::is_trivially_move_constructible<U>::value ) - // NOLINTNEXTLINE( performance-noexcept-move-constructor ) - ) noexcept( std::is_nothrow_move_constructible<T>::value ) + > + optional_constexpr14 optional( optional && other ) + // NOLINTNEXTLINE( performance-noexcept-move-constructor ) + noexcept( std11::is_nothrow_move_constructible<T>::value ) : has_value_( other.has_value() ) { if ( other.has_value() ) @@ -894,9 +979,8 @@ public: } // 4a (C++11) - explicit converting copy-construct from optional - template< typename U > - explicit optional( optional<U> const & other - optional_REQUIRES_A( + template< typename U + optional_REQUIRES_T( std::is_constructible<T, U const &>::value && !std::is_constructible<T, optional<U> & >::value && !std::is_constructible<T, optional<U> && >::value @@ -908,7 +992,8 @@ public: && !std::is_convertible< optional<U> const &&, T>::value && !std::is_convertible< U const & , T>::value /*=> explicit */ ) - ) + > + explicit optional( optional<U> const & other ) : has_value_( other.has_value() ) { if ( other.has_value() ) @@ -919,11 +1004,9 @@ public: #endif // optional_CPP11_OR_GREATER // 4b (C++98 and later) - non-explicit converting copy-construct from optional - template< typename U > - // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions ) - optional( optional<U> const & other + template< typename U #if optional_CPP11_OR_GREATER - optional_REQUIRES_A( + optional_REQUIRES_T( std::is_constructible<T, U const &>::value && !std::is_constructible<T, optional<U> & >::value && !std::is_constructible<T, optional<U> && >::value @@ -936,7 +1019,9 @@ public: && std::is_convertible< U const & , T>::value /*=> non-explicit */ ) #endif // optional_CPP11_OR_GREATER - ) + > + // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions ) + /*non-explicit*/ optional( optional<U> const & other ) : has_value_( other.has_value() ) { if ( other.has_value() ) @@ -948,9 +1033,8 @@ public: #if optional_CPP11_OR_GREATER // 5a (C++11) - explicit converting move-construct from optional - template< typename U > - explicit optional( optional<U> && other - optional_REQUIRES_A( + template< typename U + optional_REQUIRES_T( std::is_constructible<T, U &&>::value && !std::is_constructible<T, optional<U> & >::value && !std::is_constructible<T, optional<U> && >::value @@ -962,6 +1046,8 @@ public: && !std::is_convertible< optional<U> const &&, T>::value && !std::is_convertible< U &&, T>::value /*=> explicit */ ) + > + explicit optional( optional<U> && other ) : has_value_( other.has_value() ) { @@ -972,10 +1058,8 @@ public: } // 5a (C++11) - non-explicit converting move-construct from optional - template< typename U > - // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions ) - optional( optional<U> && other - optional_REQUIRES_A( + template< typename U + optional_REQUIRES_T( std::is_constructible<T, U &&>::value && !std::is_constructible<T, optional<U> & >::value && !std::is_constructible<T, optional<U> && >::value @@ -987,7 +1071,9 @@ public: && !std::is_convertible< optional<U> const &&, T>::value && std::is_convertible< U &&, T>::value /*=> non-explicit */ ) - ) + > + // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions ) + /*non-explicit*/ optional( optional<U> && other ) : has_value_( other.has_value() ) { if ( other.has_value() ) @@ -1019,32 +1105,32 @@ public: {} // 8a (C++11) - explicit move construct from value - template< typename U = value_type > - optional_constexpr explicit optional( U && value - optional_REQUIRES_A( + template< typename U = T + optional_REQUIRES_T( std::is_constructible<T, U&&>::value && !std::is_same<typename std20::remove_cvref<U>::type, nonstd_lite_in_place_t(U)>::value && !std::is_same<typename std20::remove_cvref<U>::type, optional<T>>::value && !std::is_convertible<U&&, T>::value /*=> explicit */ ) - ) + > + optional_constexpr explicit optional( U && value ) : has_value_( true ) - , contained( T{ std::forward<U>( value ) } ) + , contained( nonstd_lite_in_place(T), std::forward<U>( value ) ) {} // 8b (C++11) - non-explicit move construct from value - template< typename U = value_type > - // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions ) - optional_constexpr optional( U && value - optional_REQUIRES_A( + template< typename U = T + optional_REQUIRES_T( std::is_constructible<T, U&&>::value && !std::is_same<typename std20::remove_cvref<U>::type, nonstd_lite_in_place_t(U)>::value && !std::is_same<typename std20::remove_cvref<U>::type, optional<T>>::value && std::is_convertible<U&&, T>::value /*=> non-explicit */ ) - ) + > + // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions ) + optional_constexpr /*non-explicit*/ optional( U && value ) : has_value_( true ) - , contained( std::forward<U>( value ) ) + , contained( nonstd_lite_in_place(T), std::forward<U>( value ) ) {} #else // optional_CPP11_OR_GREATER @@ -1087,8 +1173,8 @@ public: ) operator=( optional const & other ) noexcept( - std::is_nothrow_move_assignable<T>::value - && std::is_nothrow_move_constructible<T>::value + std11::is_nothrow_move_assignable<T>::value + && std11::is_nothrow_move_constructible<T>::value ) #else optional & operator=( optional const & other ) @@ -1107,7 +1193,7 @@ public: optional_REQUIRES_R( optional &, true -// std::is_move_constructible<T>::value +// std11::is_move_constructible<T>::value // && std::is_move_assignable<T>::value ) operator=( optional && other ) noexcept @@ -1124,7 +1210,7 @@ public: optional_REQUIRES_R( optional &, std::is_constructible<T , U>::value - && std::is_assignable<T&, U>::value + && std11::is_assignable<T&, U>::value && !std::is_same<typename std20::remove_cvref<U>::type, nonstd_lite_in_place_t(U)>::value && !std::is_same<typename std20::remove_cvref<U>::type, optional<T>>::value && !(std::is_scalar<T>::value && std::is_same<T, typename std::decay<U>::type>::value) @@ -1162,7 +1248,7 @@ public: optional_REQUIRES_R( optional&, std::is_constructible< T , U const &>::value - && std::is_assignable< T&, U const &>::value + && std11::is_assignable< T&, U const &>::value && !std::is_constructible<T, optional<U> & >::value && !std::is_constructible<T, optional<U> && >::value && !std::is_constructible<T, optional<U> const & >::value @@ -1171,10 +1257,10 @@ public: && !std::is_convertible< optional<U> && , T>::value && !std::is_convertible< optional<U> const & , T>::value && !std::is_convertible< optional<U> const &&, T>::value - && !std::is_assignable< T&, optional<U> & >::value - && !std::is_assignable< T&, optional<U> && >::value - && !std::is_assignable< T&, optional<U> const & >::value - && !std::is_assignable< T&, optional<U> const && >::value + && !std11::is_assignable< T&, optional<U> & >::value + && !std11::is_assignable< T&, optional<U> && >::value + && !std11::is_assignable< T&, optional<U> const & >::value + && !std11::is_assignable< T&, optional<U> const && >::value ) #else optional& @@ -1192,7 +1278,7 @@ public: optional_REQUIRES_R( optional&, std::is_constructible< T , U>::value - && std::is_assignable< T&, U>::value + && std11::is_assignable< T&, U>::value && !std::is_constructible<T, optional<U> & >::value && !std::is_constructible<T, optional<U> && >::value && !std::is_constructible<T, optional<U> const & >::value @@ -1201,10 +1287,10 @@ public: && !std::is_convertible< optional<U> && , T>::value && !std::is_convertible< optional<U> const & , T>::value && !std::is_convertible< optional<U> const &&, T>::value - && !std::is_assignable< T&, optional<U> & >::value - && !std::is_assignable< T&, optional<U> && >::value - && !std::is_assignable< T&, optional<U> const & >::value - && !std::is_assignable< T&, optional<U> const && >::value + && !std11::is_assignable< T&, optional<U> & >::value + && !std11::is_assignable< T&, optional<U> && >::value + && !std11::is_assignable< T&, optional<U> const & >::value + && !std11::is_assignable< T&, optional<U> const && >::value ) operator=( optional<U> && other ) { @@ -1246,7 +1332,7 @@ public: void swap( optional & other ) #if optional_CPP11_OR_GREATER noexcept( - std::is_nothrow_move_constructible<T>::value + std11::is_nothrow_move_constructible<T>::value && std17::is_nothrow_swappable<T>::value ) #endif @@ -1283,7 +1369,7 @@ public: contained.value(); } -#if optional_HAVE( REF_QUALIFIER ) && ( !optional_COMPILER_GNUC_VERSION || optional_COMPILER_GNUC_VERSION >= 490 ) +#if optional_HAVE( REF_QUALIFIER ) optional_constexpr value_type const && operator *() const optional_refref_qual { @@ -1612,7 +1698,7 @@ inline optional_constexpr bool operator>=( U const & v, optional<T> const & x ) template< typename T #if optional_CPP11_OR_GREATER optional_REQUIRES_T( - std::is_move_constructible<T>::value + std11::is_move_constructible<T>::value && std17::is_swappable<T>::value ) #endif > @@ -1659,7 +1745,10 @@ optional<T> make_optional( T const & value ) using optional_lite::optional; using optional_lite::nullopt_t; using optional_lite::nullopt; + +#if ! optional_CONFIG_NO_EXCEPTIONS using optional_lite::bad_optional_access; +#endif using optional_lite::make_optional; diff --git a/src/third_party/win32/mktemp.c b/src/third_party/win32/mktemp.c new file mode 100644 index 0000000..8963b89 --- /dev/null +++ b/src/third_party/win32/mktemp.c @@ -0,0 +1,260 @@ +/* $OpenBSD: mktemp.c,v 1.39 2017/11/28 06:55:49 tb Exp $ */ +/* + * Copyright (c) 1996-1998, 2008 Theo de Raadt + * Copyright (c) 1997, 2008-2009 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef _WIN32 +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0600 // _WIN32_WINNT_VISTA +#endif + +#ifndef _CRT_NONSTDC_NO_DEPRECATE +#define _CRT_NONSTDC_NO_DEPRECATE +#endif + +#ifndef _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_WARNINGS +#endif +#endif + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +#include <direct.h> +#include <io.h> + +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX 1 +#define WIN32_NO_STATUS +#include <windows.h> +#undef WIN32_NO_STATUS +#include <ntstatus.h> + +// Work-around wrong calling convention for RtlGenRandom in old mingw-w64 +#define SystemFunction036 __stdcall SystemFunction036 +#include <ntsecapi.h> +#undef SystemFunction036 +#endif + +#ifdef _MSC_VER +#define S_IRUSR (_S_IREAD) +#define S_IWUSR (_S_IWRITE) +#endif + +#define MKTEMP_NAME 0 +#define MKTEMP_FILE 1 +#define MKTEMP_DIR 2 + +#define TEMPCHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" +#define NUM_CHARS (sizeof(TEMPCHARS) - 1) +#define MIN_X 6 + +#ifdef _WIN32 +#define MKOTEMP_FLAGS (_O_APPEND|_O_NOINHERIT|_O_BINARY|_O_TEXT| \ + _O_U16TEXT|_O_U8TEXT|_O_WTEXT) +#define MKTEMP_FLAGS_DEFAULT (_O_BINARY) +#else +#define MKOTEMP_FLAGS (O_APPEND|O_CLOEXEC|O_DSYNC|O_RSYNC|O_SYNC) +#define MKTEMP_FLAGS_DEFAULT (0) +#endif + +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + +#ifdef _WIN32 +static BOOL CALLBACK +lookup_ntdll_function_once( + PINIT_ONCE init_once, PVOID parameter, PVOID *context) +{ + (void)init_once; + *context = (PVOID)GetProcAddress( + GetModuleHandleA("ntdll.dll"), parameter); + return(TRUE); +} + +static NTSTATUS +GetLastNtStatus() +{ + static INIT_ONCE init_once = INIT_ONCE_STATIC_INIT; + typedef NTSTATUS(NTAPI * RtlGetLastNtStatus_t)(void); + RtlGetLastNtStatus_t get_last_nt_status = NULL; + InitOnceExecuteOnce(&init_once, lookup_ntdll_function_once, + "RtlGetLastNtStatus", (LPVOID *)&get_last_nt_status); + return(get_last_nt_status()); +} + +static int +normalize_msvcrt_errno(int ret) +{ + if (ret == -1 && errno == EACCES && _doserrno == ERROR_ACCESS_DENIED) { + /* + * Win32 APIs return ERROR_ACCESS_DENIED for many distinct + * NTSTATUS codes, even when it's arguably inappropriate to do + * so, e.g. if you attempt to open a directory, or open a file + * that's in the "pending delete" state. These are mapped to + * EACCESS in the C runtime. We instead map these to EEXIST. + */ + NTSTATUS nt_err = GetLastNtStatus(); + if (nt_err == STATUS_FILE_IS_A_DIRECTORY || + nt_err == STATUS_DELETE_PENDING) { + errno = EEXIST; + } + } + return(ret); +} + +#define open(...) (normalize_msvcrt_errno(open(__VA_ARGS__))) +#define mkdir(path, mode) (normalize_msvcrt_errno(mkdir(path))) +#define lstat(path, sb) (normalize_msvcrt_errno(stat(path, sb))) + +static void (*_bsd_mkstemp_random_source)(void *buf, size_t n); + +void +bsd_mkstemp_set_random_source(void (*f)(void *buf, size_t n)) +{ + _bsd_mkstemp_random_source = f; +} + +static void +arc4random_buf(void *buf, size_t nbytes) +{ + if (_bsd_mkstemp_random_source != NULL) { + _bsd_mkstemp_random_source(buf, nbytes); + } else { + RtlGenRandom(buf, (ULONG)nbytes); + } +} +#endif + +static int +mktemp_internal(char *path, int slen, int mode, int flags) +{ + char *start, *cp, *ep; + const char tempchars[] = TEMPCHARS; + unsigned int tries; + struct stat sb; + size_t len; + int fd; + + len = strlen(path); + if (len < MIN_X || slen < 0 || (size_t)slen > len - MIN_X) { + errno = EINVAL; + return(-1); + } + ep = path + len - slen; + + for (start = ep; start > path && start[-1] == 'X'; start--) + ; + if (ep - start < MIN_X) { + errno = EINVAL; + return(-1); + } + + if (flags & ~MKOTEMP_FLAGS) { + errno = EINVAL; + return(-1); + } + flags |= O_CREAT|O_EXCL|O_RDWR; + + tries = INT_MAX; + do { + cp = start; + do { + unsigned short rbuf[16]; + unsigned int i; + + /* + * Avoid lots of arc4random() calls by using + * a buffer sized for up to 16 Xs at a time. + */ + arc4random_buf(rbuf, sizeof(rbuf)); + for (i = 0; i < nitems(rbuf) && cp != ep; i++) + *cp++ = tempchars[rbuf[i] % NUM_CHARS]; + } while (cp != ep); + + switch (mode) { + case MKTEMP_NAME: + if (lstat(path, &sb) != 0) + return(errno == ENOENT ? 0 : -1); + break; + case MKTEMP_FILE: + fd = open(path, flags, S_IRUSR|S_IWUSR); + if (fd != -1 || errno != EEXIST) + return(fd); + break; + case MKTEMP_DIR: + if (mkdir(path, S_IRUSR|S_IWUSR|S_IXUSR) == 0) + return(0); + if (errno != EEXIST) + return(-1); + break; + } + } while (--tries); + + errno = EEXIST; + return(-1); +} + +char * +bsd_mktemp(char *path) +{ + if (mktemp_internal(path, 0, MKTEMP_NAME, MKTEMP_FLAGS_DEFAULT) == -1) + return(NULL); + return(path); +} + +int +bsd_mkostemps(char *path, int slen, int flags) +{ + return(mktemp_internal(path, slen, MKTEMP_FILE, flags)); +} + +int +bsd_mkstemp(char *path) +{ + return(mktemp_internal(path, 0, MKTEMP_FILE, MKTEMP_FLAGS_DEFAULT)); +} + +int +bsd_mkostemp(char *path, int flags) +{ + return(mktemp_internal(path, 0, MKTEMP_FILE, flags)); +} + +int +bsd_mkstemps(char *path, int slen) +{ + return(mktemp_internal(path, slen, MKTEMP_FILE, MKTEMP_FLAGS_DEFAULT)); +} + +char * +bsd_mkdtemp(char *path) +{ + int error; + + error = mktemp_internal(path, 0, MKTEMP_DIR, 0); + return(error ? NULL : path); +} diff --git a/src/third_party/win32/mktemp.h b/src/third_party/win32/mktemp.h new file mode 100644 index 0000000..40e0c16 --- /dev/null +++ b/src/third_party/win32/mktemp.h @@ -0,0 +1,18 @@ +#ifndef CCACHE_THIRD_PARTY_WIN32_MKTEMP_H_ +#define CCACHE_THIRD_PARTY_WIN32_MKTEMP_H_ + +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int bsd_mkstemp(char *); + +// Exposed for testing. +void bsd_mkstemp_set_random_source(void (*)(void *buf, size_t n)); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8e372b7..c33befd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -34,36 +34,37 @@ set_property( ${clean_files_prop_name} "${CMAKE_BINARY_DIR}/testdir") addtest(base) -addtest(nocpp2) -addtest(cpp1) -addtest(multi_arch) -addtest(serialize_diagnostics) +addtest(basedir) +addtest(cache_levels) +addtest(cleanup) addtest(color_diagnostics) -addtest(sanitize_blacklist) +addtest(cpp1) addtest(debug_prefix_map) -addtest(profiling) -addtest(profiling_gcc) -addtest(profiling_clang) -addtest(profiling_hip_clang) -addtest(split_dwarf) -addtest(masquerading) -addtest(hardlink) -addtest(fileclone) +addtest(depend) addtest(direct) addtest(direct_gcc) -addtest(depend) -addtest(basedir) -addtest(no_compression) -addtest(readonly) -addtest(readonly_direct) -addtest(cache_levels) -addtest(cleanup) -addtest(pch) -addtest(modules) -addtest(upgrade) +addtest(fileclone) +addtest(hardlink) +addtest(inode_cache) addtest(input_charset) +addtest(masquerading) +addtest(modules) +addtest(multi_arch) +addtest(no_compression) +addtest(nocpp2) addtest(nvcc) addtest(nvcc_direct) addtest(nvcc_ldir) addtest(nvcc_nocpp2) -addtest(inode_cache) +addtest(pch) +addtest(profiling) +addtest(profiling_clang) +addtest(profiling_gcc) +addtest(profiling_hip_clang) +addtest(readonly) +addtest(readonly_direct) +addtest(sanitize_blacklist) +addtest(serialize_diagnostics) +addtest(source_date_epoch) +addtest(split_dwarf) +addtest(upgrade) @@ -32,15 +32,15 @@ if [[ -t 1 ]]; then fi green() { - printf "$ansi_boldgreen$*$ansi_reset\n" + printf "$ansi_boldgreen%s$ansi_reset\n" "$*" } red() { - printf "$ansi_boldred$*$ansi_reset\n" + printf "$ansi_boldred%s$ansi_reset\n" "$*" } bold() { - printf "$ansi_bold$*$ansi_reset\n" + printf "$ansi_bold%s$ansi_reset\n" "$*" } test_failed() { @@ -81,9 +81,10 @@ find_compiler() { generate_code() { local nlines=$1 local outfile=$2 + local i rm -f $outfile - for i in $(seq $nlines); do + for ((i = 1; i <= nlines; i++)); do echo "int foo_$i(int x) { return x; }" >>$outfile done } @@ -116,7 +117,7 @@ backdate() { else m=0 fi - touch -t 1999010100$(printf "%02u" $m) "$@" + touch -t $((199901010000 + m)) "$@" } file_size() { @@ -148,7 +149,18 @@ objdump_grep_cmd() { expect_stat() { local stat="$1" local expected_value="$2" - local value="$(echo $($CCACHE -s | fgrep "$stat" | cut -c33-))" + local line + local value="" + + while IFS= read -r line; do + if [[ $line = *"$stat"* ]]; then + value="${line:32}" + # remove leading & trailing whitespace + value="${value#${value%%[![:space:]]*}}" + value="${value%${value##*[![:space:]]}}" + break + fi + done < <($CCACHE -s) if [ "$expected_value" != "$value" ]; then test_failed "Expected \"$stat\" to be $expected_value, actual $value" @@ -214,6 +226,22 @@ is_equal_object_files() { elfdump -a -w "$2".dump "$2" # these were the elfdump fields that seemed to differ (empirically) diff -I e_shoff -I sh_size -I st_name "$1".dump "$2".dump > /dev/null + elif $HOST_OS_WINDOWS && command -v dumpbin.exe >/dev/null; then + # Filter out fields that are affected by compilation time or source + # filename. + local awk_filter=' + skip {--skip; next} + + /Dump of file/ {next} # dumbin header + /time date stamp/ {next} # incremental linker timestamp + /number of symbols/ {next} # symbol count + /Filename *\| \.file$/ {skip=1; next} # .file symbol + + {print} + ' + dumpbin.exe -all -nologo "$1" | awk "$awk_filter" > "$1".dump + dumpbin.exe -all -nologo "$2" | awk "$awk_filter" > "$2".dump + cmp -s "$1".dump "$2".dump else cmp -s "$1" "$2" fi @@ -309,11 +337,12 @@ expect_perm() { } reset_environment() { - while read name; do - unset $name - done <<EOF -$(env | sed -n 's/^\(CCACHE_[A-Z0-9_]*\)=.*$/\1/p') -EOF + while IFS= read -r name; do + if [[ $name =~ ^CCACHE_[A-Z0-9_]*$ ]]; then + unset $name + fi + done < <(compgen -e) + unset GCC_COLORS unset TERM unset XDG_CACHE_HOME @@ -438,7 +467,7 @@ case $compiler_version in ;; *clang*) COMPILER_TYPE_CLANG=true - CLANG_VERSION_SUFFIX=$(echo $COMPILER | sed 's/.*clang//') + CLANG_VERSION_SUFFIX=$(echo "${COMPILER%% *}" | sed 's/.*clang//') ;; *) echo "WARNING: Compiler $COMPILER not supported (version: $compiler_version) -- not running tests" >&2 @@ -479,12 +508,18 @@ else PATH_DELIM=":" fi +if [[ $OSTYPE = msys* ]]; then + # Native symlink support for Windows. + export MSYS="${MSYS:-} winsymlinks:nativestrict" +fi + if $HOST_OS_APPLE; then SDKROOT=$(xcrun --sdk macosx --show-sdk-path 2>/dev/null) if [ "$SDKROOT" = "" ]; then echo "Error: xcrun --show-sdk-path failure" exit 1 fi + export SDKROOT SYSROOT="-isysroot `echo \"$SDKROOT\" | sed 's/ /\\ /g'`" else @@ -493,7 +528,7 @@ fi # --------------------------------------- -all_suites="$(sed -rn 's/^addtest\((.*)\)$/\1/p' $(dirname $0)/CMakeLists.txt)" +all_suites="$(sed -En 's/^addtest\((.*)\)$/\1/p' $(dirname $0)/CMakeLists.txt)" for suite in $all_suites; do . $(dirname $0)/suites/$suite.bash diff --git a/test/suites/base.bash b/test/suites/base.bash index 75e2768..e16741c 100644 --- a/test/suites/base.bash +++ b/test/suites/base.bash @@ -46,7 +46,8 @@ base_tests() { # The exact output is not tested, but at least it's something human readable # and not random memory. - if [ $($CCACHE --version | grep -c '^ccache version [a-zA-Z0-9_./+-]*$') -ne 1 ]; then + local version_pattern=$'^ccache version [a-zA-Z0-9_./+-]*\r?$' + if [ $($CCACHE --version | grep -E -c "$version_pattern") -ne 1 ]; then test_failed "Unexpected output of --version" fi @@ -212,6 +213,7 @@ base_tests() { rm -rf src # ------------------------------------------------------------------------- +if ! $HOST_OS_WINDOWS; then TEST "Source file ending with dot" mkdir src @@ -230,6 +232,7 @@ base_tests() { rm foo.o rm -rf src +fi # ------------------------------------------------------------------------- TEST "Multiple file extensions" @@ -760,19 +763,23 @@ b" expect_stat 'files in cache' 1 expect_equal_object_files reference_test1.o test1.o - CCACHE_COMPILER=$COMPILER $CCACHE non_existing_compiler_will_be_overridden_anyway -c test1.c + CCACHE_COMPILER=$COMPILER_BIN $CCACHE \ + non_existing_compiler_will_be_overridden_anyway \ + $COMPILER_ARGS -c test1.c expect_stat 'cache hit (preprocessed)' 1 expect_stat 'cache miss' 1 expect_stat 'files in cache' 1 expect_equal_object_files reference_test1.o test1.o - CCACHE_COMPILER=$COMPILER $CCACHE same/for/relative -c test1.c + CCACHE_COMPILER=$COMPILER_BIN $CCACHE same/for/relative \ + $COMPILER_ARGS -c test1.c expect_stat 'cache hit (preprocessed)' 2 expect_stat 'cache miss' 1 expect_stat 'files in cache' 1 expect_equal_object_files reference_test1.o test1.o - CCACHE_COMPILER=$COMPILER $CCACHE /and/even/absolute/compilers -c test1.c + CCACHE_COMPILER=$COMPILER_BIN $CCACHE /and/even/absolute/compilers \ + $COMPILER_ARGS -c test1.c expect_stat 'cache hit (preprocessed)' 3 expect_stat 'cache miss' 1 expect_stat 'files in cache' 1 @@ -788,7 +795,7 @@ EOF chmod +x gcc CCACHE_DEBUG=1 $CCACHE ./gcc -c test1.c - compiler_type=$(sed -rn 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log) + compiler_type=$(sed -En 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log) if [ "$compiler_type" != gcc ]; then test_failed "Compiler type $compiler_type != gcc" fi @@ -796,7 +803,7 @@ EOF rm test1.o.ccache-log CCACHE_COMPILERTYPE=clang CCACHE_DEBUG=1 $CCACHE ./gcc -c test1.c - compiler_type=$(sed -rn 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log) + compiler_type=$(sed -En 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log) if [ "$compiler_type" != clang ]; then test_failed "Compiler type $compiler_type != clang" fi @@ -970,6 +977,7 @@ EOF # ------------------------------------------------------------------------- +if ! $HOST_OS_WINDOWS; then TEST "CCACHE_UMASK" saved_umask=$(umask) @@ -1028,6 +1036,7 @@ EOF expect_perm "$stats_file" -rw-rw-r-- umask $saved_umask +fi # ------------------------------------------------------------------------- TEST "No object file due to bad prefix" @@ -1086,6 +1095,17 @@ EOF expect_stat 'compiler produced empty output' 1 # ------------------------------------------------------------------------- + TEST "Output to /dev/null" + + $CCACHE_COMPILE -c test1.c + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + $CCACHE_COMPILE -c test1.c -o /dev/null + expect_stat 'cache hit (preprocessed)' 1 + expect_stat 'cache miss' 1 + + # ------------------------------------------------------------------------- TEST "Caching stderr" cat <<EOF >stderr.c @@ -1336,6 +1356,7 @@ EOF fi # ------------------------------------------------------------------------- +if ! $HOST_OS_WINDOWS; then TEST "UNCACHED_ERR_FD" cat >compiler.sh <<'EOF' @@ -1366,6 +1387,7 @@ EOF if [ "$stderr" != "2Pu1Cc" ]; then test_failed "Unexpected stderr: $stderr != 2Pu1Cc" fi +fi # ------------------------------------------------------------------------- TEST "Invalid boolean environment configuration options" diff --git a/test/suites/cache_levels.bash b/test/suites/cache_levels.bash index 776508b..ef2e8d5 100644 --- a/test/suites/cache_levels.bash +++ b/test/suites/cache_levels.bash @@ -11,7 +11,7 @@ expect_on_level() { local expected_level="$2" slashes=$(find $CCACHE_DIR -name "*$type" \ - | sed -r -e 's!.*\.ccache/!!' -e 's![^/]*$!!' -e 's![^/]!!g') + | sed -E -e 's!.*\.ccache/!!' -e 's![^/]*$!!' -e 's![^/]!!g') actual_level=$(echo -n "$slashes" | wc -c) if [ "$actual_level" -ne "$expected_level" ]; then test_failed "$type file on level $actual_level, expected level $expected_level" diff --git a/test/suites/cleanup.bash b/test/suites/cleanup.bash index 33cf02c..b2c53a5 100644 --- a/test/suites/cleanup.bash +++ b/test/suites/cleanup.bash @@ -1,10 +1,11 @@ prepare_cleanup_test_dir() { local dir=$1 + local i rm -rf $dir mkdir -p $dir - for i in $(seq 0 9); do - printf '%4017s' '' | tr ' ' 'A' >$dir/result${i}R + for ((i = 0; i < 10; ++i)); do + printf 'A%.0s' {1..4017} >$dir/result${i}R backdate $((3 * i + 1)) $dir/result${i}R done # NUMFILES: 10, TOTALSIZE: 13 KiB, MAXFILES: 0, MAXSIZE: 0 diff --git a/test/suites/color_diagnostics.bash b/test/suites/color_diagnostics.bash index 64c7d4f..4ec99f4 100644 --- a/test/suites/color_diagnostics.bash +++ b/test/suites/color_diagnostics.bash @@ -113,17 +113,32 @@ color_diagnostics_test() { expect_stat 'cache miss' 1 expect_stat 'cache hit (preprocessed)' 1 - # ------------------------------------------------------------------------- if $COMPILER_TYPE_GCC; then + # --------------------------------------------------------------------- TEST "-fcolor-diagnostics not accepted for GCC" generate_code 1 test.c + + if $CCACHE_COMPILE -fcolor-diagnostics -c test.c >&/dev/null; then + test_failed "-fcolor-diagnostics unexpectedly accepted by GCC" + fi + + # --------------------------------------------------------------------- + TEST "-fcolor-diagnostics not accepted for GCC for cached result" + + generate_code 1 test.c + + if ! $CCACHE_COMPILE -c test.c >&/dev/null; then + test_failed "unknown error compiling" + fi + if $CCACHE_COMPILE -fcolor-diagnostics -c test.c >&/dev/null; then test_failed "-fcolor-diagnostics unexpectedly accepted by GCC" fi fi while read -r case; do + # --------------------------------------------------------------------- TEST "Cache object shared across ${case} (run_second_cpp=$run_second_cpp)" color_diagnostics_generate_code test1.c diff --git a/test/suites/inode_cache.bash b/test/suites/inode_cache.bash index dc8d5f0..ef9c924 100644 --- a/test/suites/inode_cache.bash +++ b/test/suites/inode_cache.bash @@ -1,4 +1,9 @@ SUITE_inode_cache_PROBE() { + if $HOST_OS_WINDOWS; then + echo "inode cache not available on Windows" + return + fi + temp_dir=$(dirname $($CCACHE -k temporary_dir)) fs=$(stat -fLc %T $temp_dir) if [ "$fs" = "nfs" ]; then diff --git a/test/suites/nvcc.bash b/test/suites/nvcc.bash index d73623b..386015d 100644 --- a/test/suites/nvcc.bash +++ b/test/suites/nvcc.bash @@ -127,10 +127,10 @@ nvcc_tests() { expect_stat 'files in cache' 3 $cuobjdump test_cuda.o > test1.dump expect_equal_content reference_test3.dump test1.dump - + # ------------------------------------------------------------------------- TEST "Option -dc" - + $REAL_NVCC $nvcc_opts_cuda -dc -o reference_test4.o test_cuda.cu $cuobjdump reference_test4.o > reference_test4.dump diff --git a/test/suites/pch.bash b/test/suites/pch.bash index 5729575..97a5c5e 100644 --- a/test/suites/pch.bash +++ b/test/suites/pch.bash @@ -609,6 +609,29 @@ pch_suite_gcc() { expect_stat 'cache hit (direct)' 2 expect_stat 'cache hit (preprocessed)' 0 expect_stat 'cache miss' 1 + + # ------------------------------------------------------------------------- + TEST "Too new PCH file" + + # If the precompiled header is too new we shouldn't cache the result at all + # since: + # + # - the precompiled header content must be included in the hash, but + # - we don't trust the precompiled header content so we can't hash it + # ourselves, and + # - the preprocessed output doesn't contain the preprocessed header content. + + touch lib.h + touch main.c + + $REAL_COMPILER $SYSROOT -c lib.h + touch -d "@$(($(date +%s) + 60))" lib.h.gch # 1 minute in the future + + CCACHE_SLOPPINESS="$DEFAULT_SLOPPINESS pch_defines,time_macros" $CCACHE_COMPILE -include lib.h -c main.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 0 + expect_stat "can't use precompiled header" 1 } pch_suite_clang() { diff --git a/test/suites/source_date_epoch.bash b/test/suites/source_date_epoch.bash new file mode 100644 index 0000000..d9d11ba --- /dev/null +++ b/test/suites/source_date_epoch.bash @@ -0,0 +1,99 @@ +SUITE_source_date_epoch_PROBE() { + echo 'char x[] = __DATE__;' >test.c + if ! SOURCE_DATE_EPOCH=0 $REAL_COMPILER -E test.c | grep -q 1970; then + echo "SOURCE_DATE_EPOCH not supported by compiler" + fi +} + +SUITE_source_date_epoch_SETUP() { + echo 'char x;' >without_temporal_macros.c + echo 'char x[] = __DATE__;' >with_date_macro.c + echo 'char x[] = __TIME__;' >with_time_macro.c +} + +SUITE_source_date_epoch() { + # ------------------------------------------------------------------------- + TEST "Without temporal macro" + + unset CCACHE_NODIRECT + + SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c without_temporal_macros.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c without_temporal_macros.c + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c without_temporal_macros.c + expect_stat 'cache hit (direct)' 2 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + # ------------------------------------------------------------------------- + TEST "With __DATE__ macro" + + unset CCACHE_NODIRECT + + SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_date_macro.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_date_macro.c + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c with_date_macro.c + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache hit (preprocessed)' 1 + expect_stat 'cache miss' 1 + + # ------------------------------------------------------------------------- + TEST "With __TIME__ macro" + + unset CCACHE_NODIRECT + + SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 1 + expect_stat 'cache miss' 1 + + SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c with_time_macro.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 1 + expect_stat 'cache miss' 2 + + # ------------------------------------------------------------------------- + TEST "With __TIME__ and time_macros sloppiness" + + unset CCACHE_NODIRECT + + CCACHE_SLOPPINESS=time_macros SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + CCACHE_SLOPPINESS=time_macros SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + CCACHE_SLOPPINESS=time_macros SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c with_time_macro.c + expect_stat 'cache hit (direct)' 2 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c + expect_stat 'cache hit (direct)' 2 + expect_stat 'cache hit (preprocessed)' 1 + expect_stat 'cache miss' 1 +} diff --git a/test/suites/split_dwarf.bash b/test/suites/split_dwarf.bash index d8c3805..28a3293 100644 --- a/test/suites/split_dwarf.bash +++ b/test/suites/split_dwarf.bash @@ -142,4 +142,38 @@ SUITE_split_dwarf() { elif [ ! -f reference.dwo ] && [ -f test.dwo ]; then test_failed ".dwo not missing" fi + + # ------------------------------------------------------------------------- + TEST "Object file without dot" + + $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + expect_exists test.dwo + + rm test.dwo + + $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + expect_exists test.dwo + + # ------------------------------------------------------------------------- + TEST "Object file with two dots" + + $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test.x.y + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + expect_exists test.x.dwo + + rm test.x.dwo + + $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test.x.y + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + expect_exists test.x.dwo } diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index c82a226..48cf058 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -27,7 +27,7 @@ if(INODE_CACHE_SUPPORTED) endif() if(WIN32) - list(APPEND source_files test_Win32Util.cpp) + list(APPEND source_files test_bsdmkstemp.cpp test_Win32Util.cpp) endif() add_executable(unittest ${source_files}) @@ -36,6 +36,6 @@ target_link_libraries( unittest PRIVATE standard_settings standard_warnings ccache_lib third_party_lib) -target_include_directories(unittest PRIVATE ${CMAKE_BINARY_DIR} . ../src) +target_include_directories(unittest PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${ccache_SOURCE_DIR}/src) add_test(NAME unittest COMMAND unittest) diff --git a/unittest/test_Config.cpp b/unittest/test_Config.cpp index 3661c69..fad4ff4 100644 --- a/unittest/test_Config.cpp +++ b/unittest/test_Config.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2011-2020 Joel Rosdahl and other contributors +// Copyright (C) 2011-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -17,8 +17,8 @@ // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "../src/Config.hpp" +#include "../src/Sloppiness.hpp" #include "../src/Util.hpp" -#include "../src/ccache.hpp" #include "../src/exceptions.hpp" #include "../src/fmtmacros.hpp" #include "TestUtil.hpp" @@ -48,6 +48,7 @@ TEST_CASE("Config: default values") CHECK(config.compression_level() == 0); CHECK(config.cpp_extension().empty()); CHECK(!config.debug()); + CHECK(config.debug_dir().empty()); CHECK(!config.depend_mode()); CHECK(config.direct_mode()); CHECK(!config.disable()); @@ -375,6 +376,7 @@ TEST_CASE("Config::visit_items") "compression_level = 8\n" "cpp_extension = ce\n" "debug = false\n" + "debug_dir = /dd\n" "depend_mode = true\n" "direct_mode = false\n" "disable = true\n" @@ -431,6 +433,7 @@ TEST_CASE("Config::visit_items") "(test.conf) compression_level = 8", "(test.conf) cpp_extension = ce", "(test.conf) debug = false", + "(test.conf) debug_dir = /dd", "(test.conf) depend_mode = true", "(test.conf) direct_mode = false", "(test.conf) disable = true", diff --git a/unittest/test_Counters.cpp b/unittest/test_Counters.cpp index d2382af..b4d3be3 100644 --- a/unittest/test_Counters.cpp +++ b/unittest/test_Counters.cpp @@ -17,7 +17,7 @@ // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "../src/Counters.hpp" -#include "../src/Statistics.hpp" +#include "../src/Statistic.hpp" #include "TestUtil.hpp" #include "third_party/doctest.h" diff --git a/unittest/test_Lockfile.cpp b/unittest/test_Lockfile.cpp index 02e3672..7a01512 100644 --- a/unittest/test_Lockfile.cpp +++ b/unittest/test_Lockfile.cpp @@ -45,6 +45,15 @@ TEST_CASE("Lockfile acquire and release") CHECK(!Stat::lstat("test.lock")); } +TEST_CASE("Lockfile creates missing directories") +{ + TestContext test_context; + + Lockfile lock("a/b/c/test", 1000); + CHECK(lock.acquired()); + CHECK(Stat::lstat("a/b/c/test.lock")); +} + #ifndef _WIN32 TEST_CASE("Lockfile breaking") { diff --git a/unittest/test_Statistics.cpp b/unittest/test_Statistics.cpp index 5d6892c..0e647fb 100644 --- a/unittest/test_Statistics.cpp +++ b/unittest/test_Statistics.cpp @@ -16,6 +16,7 @@ // this program; if not, write to the Free Software Foundation, Inc., 51 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#include "../src/Statistic.hpp" #include "../src/Statistics.hpp" #include "../src/Util.hpp" #include "../src/fmtmacros.hpp" diff --git a/unittest/test_Util.cpp b/unittest/test_Util.cpp index 917c137..5fd5239 100644 --- a/unittest/test_Util.cpp +++ b/unittest/test_Util.cpp @@ -146,6 +146,16 @@ TEST_CASE("Util::dir_name") CHECK(Util::dir_name("/") == "/"); CHECK(Util::dir_name("/foo") == "/"); CHECK(Util::dir_name("/foo/bar/f.txt") == "/foo/bar"); + +#ifdef _WIN32 + CHECK(Util::dir_name("C:/x/y") == "C:/x"); + CHECK(Util::dir_name("X:/x/y") == "X:/x"); + CHECK(Util::dir_name("C:\\x\\y") == "C:\\x"); + CHECK(Util::dir_name("C:/x") == "C:/"); + CHECK(Util::dir_name("C:\\x") == "C:\\"); + CHECK(Util::dir_name("C:/") == "C:/"); + CHECK(Util::dir_name("C:\\") == "C:\\"); +#endif } TEST_CASE("Util::strip_ansi_csi_seqs") @@ -366,43 +376,40 @@ TEST_CASE("Util::get_level_1_files") Util::write_file("0/1/file_c", "12"); Util::write_file("0/f/c/file_d", "123"); - std::vector<std::shared_ptr<CacheFile>> files; auto null_receiver = [](double) {}; SUBCASE("nonexistent subdirectory") { - Util::get_level_1_files("2", null_receiver, files); + const auto files = Util::get_level_1_files("2", null_receiver); CHECK(files.empty()); } SUBCASE("empty subdirectory") { - Util::get_level_1_files("e", null_receiver, files); + const auto files = Util::get_level_1_files("e", null_receiver); CHECK(files.empty()); } SUBCASE("simple case") { - Util::get_level_1_files("0", null_receiver, files); + auto files = Util::get_level_1_files("0", null_receiver); REQUIRE(files.size() == 4); // Files within a level are in arbitrary order, sort them to be able to // verify them. - std::sort(files.begin(), - files.end(), - [](const std::shared_ptr<CacheFile>& f1, - const std::shared_ptr<CacheFile>& f2) { - return f1->path() < f2->path(); - }); - - CHECK(files[0]->path() == os_path("0/1/file_b")); - CHECK(files[0]->lstat().size() == 1); - CHECK(files[1]->path() == os_path("0/1/file_c")); - CHECK(files[1]->lstat().size() == 2); - CHECK(files[2]->path() == os_path("0/f/c/file_d")); - CHECK(files[2]->lstat().size() == 3); - CHECK(files[3]->path() == os_path("0/file_a")); - CHECK(files[3]->lstat().size() == 0); + std::sort( + files.begin(), files.end(), [](const CacheFile& f1, const CacheFile& f2) { + return f1.path() < f2.path(); + }); + + CHECK(files[0].path() == os_path("0/1/file_b")); + CHECK(files[0].lstat().size() == 1); + CHECK(files[1].path() == os_path("0/1/file_c")); + CHECK(files[1].lstat().size() == 2); + CHECK(files[2].path() == os_path("0/f/c/file_d")); + CHECK(files[2].lstat().size() == 3); + CHECK(files[3].path() == os_path("0/file_a")); + CHECK(files[3].lstat().size() == 0); } } @@ -443,6 +450,31 @@ TEST_CASE("Util::get_path_in_cache") == "/zz/ccache/A/B/C/D/EF.suffix"); } +TEST_CASE("Util::hard_link") +{ + TestContext test_context; + + SUBCASE("Link file to nonexistent destination") + { + Util::write_file("old", "content"); + CHECK_NOTHROW(Util::hard_link("old", "new")); + CHECK(Util::read_file("new") == "content"); + } + + SUBCASE("Link file to existing destination") + { + Util::write_file("old", "content"); + Util::write_file("new", "other content"); + CHECK_NOTHROW(Util::hard_link("old", "new")); + CHECK(Util::read_file("new") == "content"); + } + + SUBCASE("Link nonexistent file") + { + CHECK_THROWS_AS(Util::hard_link("old", "new"), Error); + } +} + TEST_CASE("Util::int_to_big_endian") { uint8_t bytes[8]; @@ -529,6 +561,69 @@ TEST_CASE("Util::is_dir_separator") #endif } +TEST_CASE("Util::make_relative_path") +{ + using Util::make_relative_path; + + const TestContext test_context; + + const std::string cwd = Util::get_actual_cwd(); + const std::string actual_cwd = FMT("{}/d", cwd); +#ifdef _WIN32 + const std::string apparent_cwd = actual_cwd; +#else + const std::string apparent_cwd = FMT("{}/s", cwd); +#endif + + REQUIRE(Util::create_dir("d")); +#ifndef _WIN32 + REQUIRE(symlink("d", "s") == 0); +#endif + REQUIRE(chdir("d") == 0); + Util::setenv("PWD", apparent_cwd); + + SUBCASE("No base directory") + { + CHECK(make_relative_path("", "/a", "/a", "/a/x") == "/a/x"); + } + + SUBCASE("Path matches neither actual nor apparent CWD") + { +#ifdef _WIN32 + CHECK(make_relative_path("C:/", "C:/a", "C:/b", "C:/x") == "C:/x"); +#else + CHECK(make_relative_path("/", "/a", "/b", "/x") == "/x"); +#endif + } + + SUBCASE("Match of actual CWD") + { +#ifdef _WIN32 + CHECK( + make_relative_path( + actual_cwd.substr(0, 3), actual_cwd, apparent_cwd, actual_cwd + "/x") + == "./x"); +#else + CHECK(make_relative_path("/", actual_cwd, apparent_cwd, actual_cwd + "/x") + == "./x"); +#endif + } + +#ifndef _WIN32 + SUBCASE("Match of apparent CWD") + { + CHECK(make_relative_path("/", actual_cwd, apparent_cwd, apparent_cwd + "/x") + == "./x"); + } + + SUBCASE("Match if using resolved (using realpath(3)) path") + { + CHECK(make_relative_path("/", actual_cwd, actual_cwd, apparent_cwd + "/x") + == "./x"); + } +#endif +} + TEST_CASE("Util::matches_dir_prefix_or_file") { CHECK(!Util::matches_dir_prefix_or_file("", "")); diff --git a/unittest/test_argprocessing.cpp b/unittest/test_argprocessing.cpp index 7e2cbca..7db1ad3 100644 --- a/unittest/test_argprocessing.cpp +++ b/unittest/test_argprocessing.cpp @@ -19,7 +19,7 @@ #include "../src/Args.hpp" #include "../src/Config.hpp" #include "../src/Context.hpp" -#include "../src/Statistics.hpp" +#include "../src/Statistic.hpp" #include "../src/Util.hpp" #include "../src/fmtmacros.hpp" #include "TestUtil.hpp" diff --git a/unittest/test_bsdmkstemp.cpp b/unittest/test_bsdmkstemp.cpp new file mode 100644 index 0000000..021c73d --- /dev/null +++ b/unittest/test_bsdmkstemp.cpp @@ -0,0 +1,206 @@ +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "../src/Fd.hpp" +#include "../src/Finalizer.hpp" +#include "TestUtil.hpp" + +#include "third_party/doctest.h" +#include "third_party/win32/mktemp.h" + +#include <algorithm> +#include <memory> +#include <ostream> +#include <sddl.h> +#include <utility> + +using TestUtil::TestContext; + +namespace { + +class ScopedHANDLE +{ +public: + ScopedHANDLE() = default; + + explicit ScopedHANDLE(HANDLE handle) : m_handle(handle) + { + } + + ScopedHANDLE(ScopedHANDLE&& other) : ScopedHANDLE(other.release()) + { + } + + ~ScopedHANDLE() + { + if (m_handle != INVALID_HANDLE_VALUE) { + CloseHandle(m_handle); + } + } + + ScopedHANDLE& + operator=(ScopedHANDLE rhs) + { + std::swap(m_handle, rhs.m_handle); + return *this; + } + + explicit operator bool() const + { + return m_handle != INVALID_HANDLE_VALUE; + } + + HANDLE + get() const + { + return m_handle; + } + + HANDLE + release() + { + HANDLE handle = m_handle; + m_handle = INVALID_HANDLE_VALUE; + return handle; + } + +private: + HANDLE m_handle = INVALID_HANDLE_VALUE; +}; + +} // namespace + +TEST_SUITE_BEGIN("bsd_mkstemp"); + +TEST_CASE("bsd_mkstemp") +{ + TestContext test_context; + + static uint16_t rand_iter; + rand_iter = 0; + + bsd_mkstemp_set_random_source([](void* buf, size_t nbytes) { + std::fill_n( + static_cast<uint16_t*>(buf), nbytes / sizeof(uint16_t), rand_iter); + ++rand_iter; + }); + + Finalizer reset_random_source([] { bsd_mkstemp_set_random_source(nullptr); }); + + SUBCASE("successful") + { + std::string path = "XXXXXX"; + CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno); + CHECK(path == "AAAAAA"); + } + + SUBCASE("existing file") + { + CHECK_MESSAGE(ScopedHANDLE(CreateFileA("AAAAAA", + GENERIC_READ | GENERIC_WRITE, + 0, + nullptr, + CREATE_NEW, + FILE_ATTRIBUTE_NORMAL, + nullptr)), + "errno=" << errno); + + std::string path = "XXXXXX"; + CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno); + CHECK(path == "BBBBBB"); + } + + SUBCASE("existing file, pending delete") + { + ScopedHANDLE h; + CHECK_MESSAGE( + (h = ScopedHANDLE(CreateFileA("AAAAAA", + GENERIC_READ | GENERIC_WRITE | DELETE, + 0, + nullptr, + CREATE_NEW, + FILE_ATTRIBUTE_NORMAL, + nullptr))), + "errno=" << errno); + + // Mark file as deleted. This puts it into a "pending delete" state that + // will persist until the handle is closed. + FILE_DISPOSITION_INFO info{}; + info.DeleteFile = TRUE; + CHECK_MESSAGE(SetFileInformationByHandle( + h.get(), FileDispositionInfo, &info, sizeof(info)), + "errno=" << errno); + + std::string path = "XXXXXX"; + CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno); + CHECK(path == "BBBBBB"); + } + + SUBCASE("existing directory") + { + CHECK_MESSAGE(CreateDirectoryA("AAAAAA", nullptr), "errno=" << errno); + + std::string path = "XXXXXX"; + CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno); + CHECK(path == "BBBBBB"); + } + + SUBCASE("permission denied") + { + auto make_ACL = [](const char* acl_string) { + PSECURITY_DESCRIPTOR desc = nullptr; + ConvertStringSecurityDescriptorToSecurityDescriptorA( + acl_string, SDDL_REVISION_1, &desc, nullptr); + return std::shared_ptr<SECURITY_DESCRIPTOR>( + static_cast<SECURITY_DESCRIPTOR*>(desc), &LocalFree); + }; + + // Create a directory with a contrived ACL that denies creation of new files + // and directories to the "Everybody" (WD) group. + std::shared_ptr<SECURITY_DESCRIPTOR> desc; + CHECK_MESSAGE((desc = make_ACL("D:(D;;DCLCRPCR;;;WD)(A;;FA;;;WD)")), + "errno=" << errno); + + SECURITY_ATTRIBUTES attrs{}; + attrs.nLength = sizeof(attrs); + attrs.lpSecurityDescriptor = desc.get(); + CHECK_MESSAGE(CreateDirectoryA("my_readonly_dir", &attrs), + "errno=" << errno); + + // Sanity check that we cannot write to this directory. (E.g. Wine doesn't + // appear to emulate Windows ACLs properly when run under root.) + bool broken_acls = static_cast<bool>(ScopedHANDLE( + CreateFileA("my_readonly_dir/.writable", + GENERIC_WRITE, + 0, + nullptr, + CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE, + nullptr))); + + if (!broken_acls) { + std::string path = "my_readonly_dir/XXXXXX"; + CHECK(!Fd(bsd_mkstemp(&path[0]))); + CHECK(errno == EACCES); + } else { + MESSAGE("ACLs do not appear to function properly on this filesystem"); + } + } +} + +TEST_SUITE_END(); diff --git a/unittest/test_ccache.cpp b/unittest/test_ccache.cpp index e6cd6eb..cd59588 100644 --- a/unittest/test_ccache.cpp +++ b/unittest/test_ccache.cpp @@ -17,6 +17,7 @@ // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "../src/Context.hpp" +#include "../src/Sloppiness.hpp" #include "../src/ccache.hpp" #include "../src/fmtmacros.hpp" #include "TestUtil.hpp" |