diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-11-21 16:59:27 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-11-21 16:59:28 +0900 |
commit | 9408cea9ad30ce73c37268ab8e86ef2cededfadf (patch) | |
tree | 6dce7f99274a2d6632a63ec5484fe3bc908b2a08 | |
parent | c04192c9e5fef3601690a75a4d3dd197c79aaf5b (diff) | |
download | re2-9408cea9ad30ce73c37268ab8e86ef2cededfadf.tar.gz re2-9408cea9ad30ce73c37268ab8e86ef2cededfadf.tar.bz2 re2-9408cea9ad30ce73c37268ab8e86ef2cededfadf.zip |
Imported Upstream version 20160901upstream/20160901
Change-Id: I4e841b115bbb656c52fffa7e8af079e27db16184
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
97 files changed, 2302 insertions, 6901 deletions
@@ -11,6 +11,7 @@ exports_files(["LICENSE"]) cc_library( name = "re2", srcs = [ + "re2/bitmap256.h", "re2/bitstate.cc", "re2/compile.cc", "re2/dfa.cc", @@ -38,21 +39,18 @@ cc_library( "re2/unicode_groups.cc", "re2/unicode_groups.h", "re2/walker-inl.h", - "util/bitmap.h", "util/flags.h", - "util/hash.cc", "util/logging.cc", "util/logging.h", + "util/mix.h", "util/mutex.h", "util/rune.cc", "util/sparse_array.h", "util/sparse_set.h", - "util/stringprintf.cc", "util/strutil.cc", + "util/strutil.h", "util/utf.h", "util/util.h", - "util/valgrind.cc", - "util/valgrind.h", ], hdrs = [ "re2/filtered_re2.h", @@ -77,8 +75,6 @@ cc_library( "re2/testing/string_generator.cc", "re2/testing/tester.cc", "util/pcre.cc", - "util/random.cc", - "util/thread.cc", ], hdrs = [ "re2/testing/exhaustive_tester.h", @@ -87,9 +83,7 @@ cc_library( "re2/testing/tester.h", "util/benchmark.h", "util/pcre.h", - "util/random.h", "util/test.h", - "util/thread.h", ], deps = [":re2"], ) diff --git a/CMakeLists.txt b/CMakeLists.txt index 66cfdc6..bf70d59 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,10 +2,12 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -# Old enough to support Ubuntu Precise. -cmake_minimum_required(VERSION 2.8.7) +# Old enough to support Ubuntu Trusty. +cmake_minimum_required(VERSION 2.8.12) project(RE2 CXX) +include(CTest) + option(BUILD_SHARED_LIBS "build shared libraries" OFF) option(USEPCRE "use PCRE in tests and benchmarks" OFF) @@ -24,10 +26,9 @@ endif() if(WIN32) add_definitions(-DUNICODE -D_UNICODE -DSTRICT -DNOMINMAX) - set(THREADING threadwin) + add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS) elseif(UNIX) add_compile_options(-pthread) - set(THREADING thread) list(APPEND EXTRA_TARGET_LINK_LIBRARIES -pthread) endif() @@ -38,7 +39,7 @@ endif() include_directories(${CMAKE_SOURCE_DIR}) -set(RE2_LIBRARY_SOURCES +set(RE2_SOURCES re2/bitstate.cc re2/compile.cc re2/dfa.cc @@ -59,17 +60,14 @@ set(RE2_LIBRARY_SOURCES re2/tostring.cc re2/unicode_casefold.cc re2/unicode_groups.cc - util/hash.cc util/logging.cc util/rune.cc - util/stringprintf.cc util/strutil.cc - util/valgrind.cc ) -add_library(re2 ${RE2_LIBRARY_SOURCES}) +add_library(re2 ${RE2_SOURCES}) -set(TEST_LIBRARY_SOURCES +set(TESTING_SOURCES re2/testing/backtrack.cc re2/testing/dump.cc re2/testing/exhaustive_tester.cc @@ -78,12 +76,9 @@ set(TEST_LIBRARY_SOURCES re2/testing/string_generator.cc re2/testing/tester.cc util/pcre.cc - util/random.cc - util/${THREADING}.cc ) -add_library(test STATIC ${TEST_LIBRARY_SOURCES} util/test.cc) -add_library(benchmark STATIC ${TEST_LIBRARY_SOURCES} util/benchmark.cc) +add_library(testing STATIC ${TESTING_SOURCES}) set(TEST_TARGETS charclass_test @@ -114,11 +109,22 @@ set(BENCHMARK_TARGETS ) foreach(target ${TEST_TARGETS}) - add_executable(${target} re2/testing/${target}.cc) - target_link_libraries(${target} test re2 ${EXTRA_TARGET_LINK_LIBRARIES}) + add_executable(${target} re2/testing/${target}.cc util/test.cc) + target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES}) + add_test(NAME ${target} COMMAND ${target}) endforeach(target) foreach(target ${BENCHMARK_TARGETS}) - add_executable(${target} re2/testing/${target}.cc) - target_link_libraries(${target} benchmark re2 ${EXTRA_TARGET_LINK_LIBRARIES}) + add_executable(${target} re2/testing/${target}.cc util/benchmark.cc) + target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES}) endforeach(target) + +set(RE2_HEADERS + re2/filtered_re2.h + re2/re2.h + re2/set.h + re2/stringpiece.h + ) + +install(FILES ${RE2_HEADERS} DESTINATION include/re2) +install(TARGETS re2 ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) @@ -78,19 +78,18 @@ INSTALL_HFILES=\ HFILES=\ util/benchmark.h\ - util/bitmap.h\ util/flags.h\ util/logging.h\ + util/mix.h\ util/mutex.h\ util/pcre.h\ - util/random.h\ util/sparse_array.h\ util/sparse_set.h\ + util/strutil.h\ util/test.h\ - util/thread.h\ util/utf.h\ util/util.h\ - util/valgrind.h\ + re2/bitmap256.h\ re2/filtered_re2.h\ re2/prefilter.h\ re2/prefilter_tree.h\ @@ -108,12 +107,9 @@ HFILES=\ re2/walker-inl.h\ OFILES=\ - obj/util/hash.o\ obj/util/logging.o\ obj/util/rune.o\ - obj/util/stringprintf.o\ obj/util/strutil.o\ - obj/util/valgrind.o\ obj/re2/bitstate.o\ obj/re2/compile.o\ obj/re2/dfa.o\ @@ -137,8 +133,6 @@ OFILES=\ TESTOFILES=\ obj/util/pcre.o\ - obj/util/random.o\ - obj/util/thread.o\ obj/re2/testing/backtrack.o\ obj/re2/testing/dump.o\ obj/re2/testing/exhaustive_tester.o\ @@ -34,4 +34,5 @@ A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM. An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM. A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN. A Python wrapper is at https://github.com/facebook/pyre2/. +An R wrapper is at https://github.com/qinwf/re2r/. A Ruby wrapper is at https://github.com/mudge/re2/. diff --git a/kokoro/ubuntu/continuous-bazel.cfg b/kokoro/ubuntu/continuous-bazel.cfg new file mode 100644 index 0000000..5a48a4f --- /dev/null +++ b/kokoro/ubuntu/continuous-bazel.cfg @@ -0,0 +1 @@ +build_file: "re2/kokoro/ubuntu/continuous-bazel.sh" diff --git a/kokoro/ubuntu/continuous-bazel.sh b/kokoro/ubuntu/continuous-bazel.sh new file mode 100755 index 0000000..6f25982 --- /dev/null +++ b/kokoro/ubuntu/continuous-bazel.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -eux + +cd git/re2 + +bazel clean +bazel build --compilation_mode=dbg -- //... +bazel test --compilation_mode=dbg --test_output=errors -- //... \ + -//:dfa_test \ + -//:exhaustive1_test \ + -//:exhaustive2_test \ + -//:exhaustive3_test \ + -//:exhaustive_test \ + -//:random_test + +bazel clean +bazel build --compilation_mode=opt -- //... +bazel test --compilation_mode=opt --test_output=errors -- //... \ + -//:dfa_test \ + -//:exhaustive1_test \ + -//:exhaustive2_test \ + -//:exhaustive3_test \ + -//:exhaustive_test \ + -//:random_test + +exit 0 diff --git a/kokoro/windows/continuous-cmake.bat b/kokoro/windows/continuous-cmake.bat new file mode 100755 index 0000000..8f0f293 --- /dev/null +++ b/kokoro/windows/continuous-cmake.bat @@ -0,0 +1,11 @@ +CD git/re2 || EXIT /B 1 + +cmake -D CMAKE_BUILD_TYPE=Debug -A x64 . || EXIT /B 1 +cmake --build . --config Debug --clean-first || EXIT /B 1 +ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1 + +cmake -D CMAKE_BUILD_TYPE=Release -A x64 . || EXIT /B 1 +cmake --build . --config Release --clean-first || EXIT /B 1 +ctest -C Release --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1 + +EXIT /B 0 diff --git a/kokoro/windows/continuous-cmake.cfg b/kokoro/windows/continuous-cmake.cfg new file mode 100644 index 0000000..cc6d4aa --- /dev/null +++ b/kokoro/windows/continuous-cmake.cfg @@ -0,0 +1 @@ +build_file: "re2/kokoro/windows/continuous-cmake.bat" diff --git a/util/bitmap.h b/re2/bitmap256.h index 8a93d81..1abae99 100644 --- a/util/bitmap.h +++ b/re2/bitmap256.h @@ -2,13 +2,17 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#ifndef UTIL_BITMAP_H_ -#define UTIL_BITMAP_H_ +#ifndef RE2_BITMAP256_H_ +#define RE2_BITMAP256_H_ #ifdef _MSC_VER #include <intrin.h> #endif +#include <stdint.h> +#include <string.h> + #include "util/util.h" +#include "util/logging.h" namespace re2 { @@ -40,21 +44,38 @@ class Bitmap256 { private: // Finds the least significant non-zero bit in n. - static int FindLSBSet(uint64 n) { + static int FindLSBSet(uint64_t n) { DCHECK_NE(n, 0); #if defined(__GNUC__) return __builtin_ctzll(n); -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) && defined(_M_X64) unsigned long c; _BitScanForward64(&c, n); return static_cast<int>(c); +#elif defined(_MSC_VER) && defined(_M_IX86) + unsigned long c; + if (static_cast<uint32_t>(n) != 0) { + _BitScanForward(&c, static_cast<uint32_t>(n)); + return static_cast<int>(c); + } else { + _BitScanForward(&c, static_cast<uint32_t>(n >> 32)); + return static_cast<int>(c) + 32; + } #else -#error "bit scan forward not implemented" + int c = 63; + for (int shift = 1 << 5; shift != 0; shift >>= 1) { + uint64_t word = n << shift; + if (word != 0) { + n = word; + c -= shift; + } + } + return c; #endif } - uint64 words_[4]; + uint64_t words_[4]; }; int Bitmap256::FindNextSetBit(int c) const { @@ -63,7 +84,7 @@ int Bitmap256::FindNextSetBit(int c) const { // Check the word that contains the bit. Mask out any lower bits. int i = c / 64; - uint64 word = words_[i] & (~0ULL << (c % 64)); + uint64_t word = words_[i] & (~0ULL << (c % 64)); if (word != 0) return (i * 64) + FindLSBSet(word); @@ -89,4 +110,4 @@ int Bitmap256::FindNextSetBit(int c) const { } // namespace re2 -#endif // UTIL_BITMAP_H_ +#endif // RE2_BITMAP256_H_ diff --git a/re2/bitstate.cc b/re2/bitstate.cc index 775fbec..4552d17 100644 --- a/re2/bitstate.cc +++ b/re2/bitstate.cc @@ -17,6 +17,11 @@ // SearchBitState is a fast replacement for the NFA code on small // regexps and texts when SearchOnePass cannot be used. +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "util/logging.h" #include "re2/prog.h" #include "re2/regexp.h" @@ -60,8 +65,8 @@ class BitState { int ncap_; static const int VisitedBits = 32; - uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked - int nvisited_; // # of words in bitmap + uint32_t *visited_; // bitmap: (Inst*, char*) pairs already backtracked + size_t nvisited_; // # of words in bitmap Job *job_; // stack of text positions to explore int njob_; @@ -283,8 +288,9 @@ bool BitState::TrySearch(int id0, const char* p0) { if (submatch_[0].data() == NULL || (longest_ && p > submatch_[0].end())) { for (int i = 0; i < nsubmatch_; i++) - submatch_[i].set(cap_[2*i], - static_cast<int>(cap_[2*i+1] - cap_[2*i])); + submatch_[i] = + StringPiece(cap_[2 * i], + static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i])); } // If going for first match, we're done. @@ -326,7 +332,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, // Allocate scratch space. nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits; - visited_ = new uint32[nvisited_]; + visited_ = new uint32_t[nvisited_]; memset(visited_, 0, nvisited_*sizeof visited_[0]); // VLOG(0) << "nvisited_ = " << nvisited_; diff --git a/re2/compile.cc b/re2/compile.cc index 117679f..a99c909 100644 --- a/re2/compile.cc +++ b/re2/compile.cc @@ -8,6 +8,14 @@ // This file's external interface is just Regexp::CompileToProg. // The Compiler class defined in this file is private. +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <map> +#include <utility> + +#include "util/logging.h" +#include "util/utf.h" #include "re2/prog.h" #include "re2/re2.h" #include "re2/regexp.h" @@ -28,14 +36,14 @@ namespace re2 { // is always the fail instruction, which never appears on a list. struct PatchList { - uint32 p; + uint32_t p; // Returns patch list containing just p. - static PatchList Mk(uint32 p); + static PatchList Mk(uint32_t p); // Patches all the entries on l to have value v. // Caller must not ever use patch list again. - static void Patch(Prog::Inst *inst0, PatchList l, uint32 v); + static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v); // Deref returns the next pointer pointed at by p. static PatchList Deref(Prog::Inst *inst0, PatchList l); @@ -47,7 +55,7 @@ struct PatchList { static PatchList nullPatchList = { 0 }; // Returns patch list containing just p. -PatchList PatchList::Mk(uint32 p) { +PatchList PatchList::Mk(uint32_t p) { PatchList l; l.p = p; return l; @@ -64,7 +72,7 @@ PatchList PatchList::Deref(Prog::Inst* inst0, PatchList l) { } // Patches all the entries on l to have value v. -void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32 val) { +void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32_t val) { while (l.p != 0) { Prog::Inst* ip = &inst0[l.p>>1]; if (l.p&1) { @@ -103,11 +111,11 @@ PatchList PatchList::Append(Prog::Inst* inst0, PatchList l1, PatchList l2) { // Compiled program fragment. struct Frag { - uint32 begin; + uint32_t begin; PatchList end; Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector - Frag(uint32 begin, PatchList end) : begin(begin), end(end) {} + Frag(uint32_t begin, PatchList end) : begin(begin), end(end) {} }; // Input encodings. @@ -125,7 +133,7 @@ class Compiler : public Regexp::Walker<Frag> { // Caller is responsible for deleting Prog when finished with it. // If reversed is true, compiles for walking over the input // string backward (reverses all concatenations). - static Prog *Compile(Regexp* re, bool reversed, int64 max_mem); + static Prog *Compile(Regexp* re, bool reversed, int64_t max_mem); // Compiles alternation of all the re to a new Prog. // Each re has a match with an id equal to its index in the vector. @@ -162,7 +170,7 @@ class Compiler : public Regexp::Walker<Frag> { Frag NoMatch(); // Returns a fragment that matches the empty string. - Frag Match(int32 id); + Frag Match(int32_t id); // Returns a no-op fragment. Frag Nop(); @@ -190,8 +198,8 @@ class Compiler : public Regexp::Walker<Frag> { void Add_80_10ffff(); // New suffix that matches the byte range lo-hi, then goes to next. - int UncachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next); - int CachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next); + int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next); + int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next); // Returns true iff the suffix is cached. bool IsCachedRuneByteSuffix(int id); @@ -218,7 +226,7 @@ class Compiler : public Regexp::Walker<Frag> { // Single rune. Frag Literal(Rune r, bool foldcase); - void Setup(Regexp::ParseFlags, int64, RE2::Anchor); + void Setup(Regexp::ParseFlags, int64_t, RE2::Anchor); Prog* Finish(); // Returns .* where dot = any byte @@ -236,14 +244,15 @@ class Compiler : public Regexp::Walker<Frag> { int inst_len_; // Number of instructions used. int inst_cap_; // Number of instructions allocated. - int64 max_mem_; // Total memory budget. + int64_t max_mem_; // Total memory budget. - map<uint64, int> rune_cache_; + std::map<uint64_t, int> rune_cache_; Frag rune_range_; RE2::Anchor anchor_; // anchor mode for RE2::Set - DISALLOW_COPY_AND_ASSIGN(Compiler); + Compiler(const Compiler&) = delete; + Compiler& operator=(const Compiler&) = delete; }; Compiler::Compiler() { @@ -409,7 +418,7 @@ Frag Compiler::Nop() { } // Returns a fragment that signals a match. -Frag Compiler::Match(int32 match_id) { +Frag Compiler::Match(int32_t match_id) { int id = AllocInst(1); if (id < 0) return NoMatch(); @@ -465,7 +474,7 @@ void Compiler::BeginRange() { rune_range_.end = nullPatchList; } -int Compiler::UncachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, +int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next) { Frag f = ByteRange(lo, hi, foldcase); if (next != 0) { @@ -476,17 +485,18 @@ int Compiler::UncachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, return f.begin; } -static uint64 MakeRuneCacheKey(uint8 lo, uint8 hi, bool foldcase, int next) { - return (uint64)next << 17 | - (uint64)lo << 9 | - (uint64)hi << 1 | - (uint64)foldcase; +static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase, + int next) { + return (uint64_t)next << 17 | + (uint64_t)lo << 9 | + (uint64_t)hi << 1 | + (uint64_t)foldcase; } -int Compiler::CachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, +int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next) { - uint64 key = MakeRuneCacheKey(lo, hi, foldcase, next); - map<uint64, int>::const_iterator it = rune_cache_.find(key); + uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next); + std::map<uint64_t, int>::const_iterator it = rune_cache_.find(key); if (it != rune_cache_.end()) return it->second; int id = UncachedRuneByteSuffix(lo, hi, foldcase, next); @@ -495,12 +505,12 @@ int Compiler::CachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, } bool Compiler::IsCachedRuneByteSuffix(int id) { - uint8 lo = inst_[id].lo_; - uint8 hi = inst_[id].hi_; + uint8_t lo = inst_[id].lo_; + uint8_t hi = inst_[id].hi_; bool foldcase = inst_[id].foldcase() != 0; int next = inst_[id].out(); - uint64 key = MakeRuneCacheKey(lo, hi, foldcase, next); + uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next); return rune_cache_.find(key) != rune_cache_.end(); } @@ -652,8 +662,8 @@ void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) { return; if (hi > 0xFF) hi = 0xFF; - AddSuffix(UncachedRuneByteSuffix(static_cast<uint8>(lo), - static_cast<uint8>(hi), foldcase, 0)); + AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo), + static_cast<uint8_t>(hi), foldcase, 0)); } // Table describing how to make a UTF-8 matching machine @@ -694,8 +704,8 @@ void Compiler::Add_80_10ffff() { int next = 0; if (p.next >= 0) next = inst[p.next]; - inst[i] = UncachedRuneByteSuffix(static_cast<uint8>(p.lo), - static_cast<uint8>(p.hi), false, next); + inst[i] = UncachedRuneByteSuffix(static_cast<uint8_t>(p.lo), + static_cast<uint8_t>(p.hi), false, next); if ((p.lo & 0xC0) != 0x80) AddSuffix(inst[i]); } @@ -724,14 +734,14 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { // ASCII range is always a special case. if (hi < Runeself) { - AddSuffix(UncachedRuneByteSuffix(static_cast<uint8>(lo), - static_cast<uint8>(hi), foldcase, 0)); + AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo), + static_cast<uint8_t>(hi), foldcase, 0)); return; } // Split range into sections that agree on leading bytes. for (int i = 1; i < UTFmax; i++) { - uint m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence + uint32_t m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence if ((lo & ~m) != (hi & ~m)) { if ((lo & m) != 0) { AddRuneRangeUTF8(lo, lo|m, foldcase); @@ -747,7 +757,7 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { } // Finally. Generate byte matching equivalent for lo-hi. - uint8 ulo[UTFmax], uhi[UTFmax]; + uint8_t ulo[UTFmax], uhi[UTFmax]; int n = runetochar(reinterpret_cast<char*>(ulo), &lo); int m = runetochar(reinterpret_cast<char*>(uhi), &hi); (void)m; // USED(m) @@ -835,11 +845,11 @@ Frag Compiler::Literal(Rune r, bool foldcase) { case kEncodingUTF8: { if (r < Runeself) // Make common case fast. return ByteRange(r, r, foldcase); - uint8 buf[UTFmax]; + uint8_t buf[UTFmax]; int n = runetochar(reinterpret_cast<char*>(buf), &r); - Frag f = ByteRange((uint8)buf[0], buf[0], false); + Frag f = ByteRange((uint8_t)buf[0], buf[0], false); for (int i = 1; i < n; i++) - f = Cat(f, ByteRange((uint8)buf[i], buf[i], false)); + f = Cat(f, ByteRange((uint8_t)buf[i], buf[i], false)); return f; } } @@ -1087,7 +1097,7 @@ static bool IsAnchorEnd(Regexp** pre, int depth) { return false; } -void Compiler::Setup(Regexp::ParseFlags flags, int64 max_mem, +void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor) { prog_->set_flags(flags); @@ -1096,11 +1106,11 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64 max_mem, max_mem_ = max_mem; if (max_mem <= 0) { max_inst_ = 100000; // more than enough - } else if (max_mem <= static_cast<int64>(sizeof(Prog))) { + } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) { // No room for anything. max_inst_ = 0; } else { - int64 m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst); + int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst); // Limit instruction count so that inst->id() fits nicely in an int. // SparseArray also assumes that the indices (inst->id()) are ints. // The call to WalkExponential uses 2*max_inst_ below, @@ -1127,7 +1137,7 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64 max_mem, // If reversed is true, compiles a program that expects // to run over the input string backward (reverses all concatenations). // The reversed flag is also recorded in the returned program. -Prog* Compiler::Compile(Regexp* re, bool reversed, int64 max_mem) { +Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) { Compiler c; c.Setup(re->parse_flags(), max_mem, RE2::ANCHOR_BOTH /* unused */); @@ -1201,7 +1211,7 @@ Prog* Compiler::Finish() { if (max_mem_ <= 0) { prog_->set_dfa_mem(1<<20); } else { - int64 m = max_mem_ - sizeof(Prog) - inst_len_*sizeof(Prog::Inst); + int64_t m = max_mem_ - sizeof(Prog) - inst_len_*sizeof(Prog::Inst); if (m < 0) m = 0; prog_->set_dfa_mem(m); @@ -1213,11 +1223,11 @@ Prog* Compiler::Finish() { } // Converts Regexp to Prog. -Prog* Regexp::CompileToProg(int64 max_mem) { +Prog* Regexp::CompileToProg(int64_t max_mem) { return Compiler::Compile(this, false, max_mem); } -Prog* Regexp::CompileToReverseProg(int64 max_mem) { +Prog* Regexp::CompileToReverseProg(int64_t max_mem) { return Compiler::Compile(this, true, max_mem); } @@ -21,8 +21,26 @@ // // See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent. +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <algorithm> +#include <atomic> +#include <map> +#include <new> +#include <string> +#include <unordered_set> +#include <utility> +#include <vector> + #include "util/flags.h" +#include "util/logging.h" +#include "util/mix.h" +#include "util/mutex.h" #include "util/sparse_set.h" +#include "util/strutil.h" #include "re2/prog.h" #include "re2/stringpiece.h" @@ -30,6 +48,11 @@ DEFINE_bool(re2_dfa_bail_when_slow, true, "Whether the RE2 DFA should bail out early " "if the NFA would be faster (for testing)."); +// Silence "zero-sized array in struct/union" warning for DFA::State::next_. +#ifdef _MSC_VER +#pragma warning(disable: 4200) +#endif + namespace re2 { #if !defined(__linux__) /* only Linux seems to have memrchr */ @@ -53,7 +76,7 @@ static const bool DebugDFA = false; // the comments in the sections that follow the DFA definition. class DFA { public: - DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem); + DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem); ~DFA(); bool ok() const { return !init_failed_; } Prog::MatchKind kind() { return kind_; } @@ -73,7 +96,7 @@ class DFA { // memory), it sets *failed and returns false. bool Search(const StringPiece& text, const StringPiece& context, bool anchored, bool want_earliest_match, bool run_forward, - bool* failed, const char** ep, vector<int>* matches); + bool* failed, const char** ep, std::vector<int>* matches); // Builds out all states for the entire DFA. FOR TESTING ONLY // Returns number of states. @@ -85,20 +108,20 @@ class DFA { // These data structures are logically private, but C++ makes it too // difficult to mark them as such. - class Workq; class RWLocker; class StateSaver; + class Workq; // A single DFA state. The DFA is represented as a graph of these // States, linked by the next_ pointers. If in state s and reading // byte c, the next state should be s->next_[c]. struct State { inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; } - void SaveMatch(vector<int>* v); + void SaveMatch(std::vector<int>* v); int* inst_; // Instruction pointers in the state. int ninst_; // # of inst_ pointers. - uint flag_; // Empty string bitfield flags in effect on the way + uint32_t flag_; // Empty string bitfield flags in effect on the way // into this state, along with kFlagMatch if this // is a matching state. std::atomic<State*> next_[]; // Outgoing arrows from State, @@ -116,35 +139,33 @@ class DFA { struct StateHash { size_t operator()(const State* a) const { - if (a == NULL) - return 0; - const char* s = reinterpret_cast<const char*>(a->inst_); - int len = a->ninst_ * sizeof a->inst_[0]; - if (sizeof(size_t) == sizeof(uint32)) - return static_cast<size_t>(Hash32StringWithSeed(s, len, a->flag_)); - else - return static_cast<size_t>(Hash64StringWithSeed(s, len, a->flag_)); + DCHECK(a != NULL); + HashMix mix(a->flag_); + for (int i = 0; i < a->ninst_; i++) + mix.Mix(a->inst_[i]); + mix.Mix(0); + return mix.get(); } }; struct StateEqual { bool operator()(const State* a, const State* b) const { + DCHECK(a != NULL); + DCHECK(b != NULL); if (a == b) return true; - if (a == NULL || b == NULL) + if (a->flag_ != b->flag_) return false; if (a->ninst_ != b->ninst_) return false; - if (a->flag_ != b->flag_) - return false; for (int i = 0; i < a->ninst_; i++) if (a->inst_[i] != b->inst_[i]) return false; - return true; // they're equal + return true; } }; - typedef unordered_set<State*, StateHash, StateEqual> StateSet; + typedef std::unordered_set<State*, StateHash, StateEqual> StateSet; private: // Special "firstbyte" values for a state. (Values >= 0 denote actual bytes.) @@ -176,11 +197,11 @@ class DFA { // Looks up and returns the State corresponding to a Workq. // L >= mutex_ - State* WorkqToCachedState(Workq* q, uint flag); + State* WorkqToCachedState(Workq* q, uint32_t flag); // Looks up and returns a State matching the inst, ninst, and flag. // L >= mutex_ - State* CachedState(int* inst, int ninst, uint flag); + State* CachedState(int* inst, int ninst, uint32_t flag); // Clear the cache entirely. // Must hold cache_mutex_.w or be in destructor. @@ -199,17 +220,17 @@ class DFA { // sets *ismatch to true. // L >= mutex_ void RunWorkqOnByte(Workq* q, Workq* nq, - int c, uint flag, bool* ismatch, + int c, uint32_t flag, bool* ismatch, Prog::MatchKind kind); // Runs a Workq on a set of empty-string flags, producing a new Workq in nq. // L >= mutex_ - void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint flag); + void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint32_t flag); // Adds the instruction id to the Workq, following empty arrows // according to flag. // L >= mutex_ - void AddToQueue(Workq* q, int id, uint flag); + void AddToQueue(Workq* q, int id, uint32_t flag); // For debugging, returns a text representation of State. static string DumpState(State* state); @@ -242,10 +263,11 @@ class DFA { RWLocker *cache_lock; bool failed; // "out" parameter: whether search gave up const char* ep; // "out" parameter: end pointer for match - vector<int>* matches; + std::vector<int>* matches; private: - DISALLOW_COPY_AND_ASSIGN(SearchParams); + SearchParams(const SearchParams&) = delete; + SearchParams& operator=(const SearchParams&) = delete; }; // Before each search, the parameters to Search are analyzed by @@ -262,7 +284,8 @@ class DFA { // false on failure. // cache_mutex_.r <= L < mutex_ bool AnalyzeSearch(SearchParams* params); - bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info, uint flags); + bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info, + uint32_t flags); // The generic search loop, inlined to create specialized versions. // cache_mutex_.r <= L < mutex_ @@ -327,16 +350,16 @@ class DFA { // readers. Any State* pointers are only valid while cache_mutex_ // is held. Mutex cache_mutex_; - int64 mem_budget_; // Total memory budget for all States. - int64 state_budget_; // Amount of memory remaining for new States. + int64_t mem_budget_; // Total memory budget for all States. + int64_t state_budget_; // Amount of memory remaining for new States. StateSet state_cache_; // All States computed so far. StartInfo start_[kMaxStart]; bool cache_warned_; // have printed to LOG(INFO) about the cache }; -// Shorthand for casting to uint8*. -static inline const uint8* BytePtr(const void* v) { - return reinterpret_cast<const uint8*>(v); +// Shorthand for casting to uint8_t*. +static inline const uint8_t* BytePtr(const void* v) { + return reinterpret_cast<const uint8_t*>(v); } // Work queues @@ -397,10 +420,12 @@ class DFA::Workq : public SparseSet { int maxmark_; // maximum number of marks int nextmark_; // id of next mark bool last_was_mark_; // last inserted was mark - DISALLOW_COPY_AND_ASSIGN(Workq); + + Workq(const Workq&) = delete; + Workq& operator=(const Workq&) = delete; }; -DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem) +DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem) : prog_(prog), kind_(kind), init_failed_(false), @@ -426,8 +451,8 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem) (sizeof(int)+sizeof(int)) * 2; // q0, q1 mem_budget_ -= nastack_ * sizeof(int); // astack if (mem_budget_ < 0) { - LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld", - prog_->size(), max_mem); + LOG(INFO) << "DFA out of memory: prog size " << prog_->size() + << " mem " << max_mem; init_failed_ = true; return; } @@ -441,11 +466,11 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem) // Note that a state stores list heads only, so we use the program // list count for the upper bound, not the program size. int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot - int64 one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) + - (prog_->list_count()+nmark)*sizeof(int); + int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) + + (prog_->list_count()+nmark)*sizeof(int); if (state_budget_ < 20*one_state) { - LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld", - prog_->size(), max_mem); + LOG(INFO) << "DFA out of memory: prog size " << prog_->size() + << " mem " << max_mem; init_failed_ = true; return; } @@ -574,9 +599,8 @@ string DFA::DumpState(State* state) { // Looks in the State cache for a State matching q, flag. // If one is found, returns it. If one is not found, allocates one, // inserts it in the cache, and returns it. -DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) { - if (DEBUG_MODE) - mutex_.AssertHeld(); +DFA::State* DFA::WorkqToCachedState(Workq* q, uint32_t flag) { + //mutex_.AssertHeld(); // Construct array of instruction ids for the new state. // Only ByteRange, EmptyWidth, and Match instructions are useful to keep: @@ -584,9 +608,9 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) { // RunWorkqOnEmptyString or RunWorkqOnByte. int* inst = new int[q->size()]; int n = 0; - uint needflags = 0; // flags needed by kInstEmptyWidth instructions - bool sawmatch = false; // whether queue contains guaranteed kInstMatch - bool sawmark = false; // whether queue contains a Mark + uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions + bool sawmatch = false; // whether queue contains guaranteed kInstMatch + bool sawmark = false; // whether queue contains a Mark if (DebugDFA) fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag); for (Workq::iterator it = q->begin(); it != q->end(); ++it) { @@ -675,7 +699,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) { int* markp = ip; while (markp < ep && *markp != Mark) markp++; - sort(ip, markp); + std::sort(ip, markp); if (markp < ep) markp++; ip = markp; @@ -693,9 +717,8 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) { // Looks in the State cache for a State matching inst, ninst, flag. // If one is found, returns it. If one is not found, allocates one, // inserts it in the cache, and returns it. -DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) { - if (DEBUG_MODE) - mutex_.AssertHeld(); +DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) { + //mutex_.AssertHeld(); // Look in the cache for a pre-existing state. // We have to initialise the struct like this because otherwise @@ -713,9 +736,9 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) { // Must have enough memory for new state. // In addition to what we're going to allocate, - // the state cache hash table seems to incur about 32 bytes per + // the state cache hash table seems to incur about 40 bytes per // State*, empirically. - const int kStateCacheOverhead = 32; + const int kStateCacheOverhead = 40; int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) + ninst*sizeof(int); @@ -771,7 +794,7 @@ void DFA::StateToWorkq(State* s, Workq* q) { } // Adds ip to the work queue, following empty arrows according to flag. -void DFA::AddToQueue(Workq* q, int id, uint flag) { +void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { // Use astack_ to hold our stack of instructions yet to process. // It was preallocated as follows: @@ -871,7 +894,7 @@ void DFA::AddToQueue(Workq* q, int id, uint flag) { // and then processing only $. Doing the two-step sequence won't match // ^$^$^$ but processing ^ and $ simultaneously will (and is the behavior // exhibited by existing implementations). -void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint flag) { +void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) { newq->clear(); for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { if (oldq->is_mark(*i)) @@ -886,10 +909,9 @@ void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint flag) { // means to match c$. Sets the bool *ismatch to true if the end of the // regular expression program has been reached (the regexp has matched). void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, - int c, uint flag, bool* ismatch, + int c, uint32_t flag, bool* ismatch, Prog::MatchKind kind) { - if (DEBUG_MODE) - mutex_.AssertHeld(); + //mutex_.AssertHeld(); newq->clear(); for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { @@ -946,8 +968,8 @@ DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) { // Processes input byte c in state, returning new state. DFA::State* DFA::RunStateOnByte(State* state, int c) { - if (DEBUG_MODE) - mutex_.AssertHeld(); + //mutex_.AssertHeld(); + if (state <= SpecialStateMax) { if (state == FullMatchState) { // It is convenient for routines like PossibleMatchRange @@ -980,10 +1002,10 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // around this byte. Before the byte we have the flags recorded // in the State structure itself. After the byte we have // nothing yet (but that will change: read on). - uint needflag = state->flag_ >> kFlagNeedShift; - uint beforeflag = state->flag_ & kFlagEmptyMask; - uint oldbeforeflag = beforeflag; - uint afterflag = 0; + uint32_t needflag = state->flag_ >> kFlagNeedShift; + uint32_t beforeflag = state->flag_ & kFlagEmptyMask; + uint32_t oldbeforeflag = beforeflag; + uint32_t afterflag = 0; if (c == '\n') { // Insert implicit $ and ^ around \n @@ -1000,7 +1022,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // byte processed was a word character. Use that info to // insert empty-width (non-)word boundaries. bool islastword = (state->flag_ & kFlagLastWord) != 0; - bool isword = (c != kByteEndText && Prog::IsWordChar(static_cast<uint8>(c))); + bool isword = c != kByteEndText && Prog::IsWordChar(static_cast<uint8_t>(c)); if (isword == islastword) beforeflag |= kEmptyNonWordBoundary; else @@ -1010,6 +1032,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // Only useful to rerun on empty string if there are new, useful flags. if (beforeflag & ~oldbeforeflag & needflag) { RunWorkqOnEmptyString(q0_, q1_, beforeflag); + using std::swap; swap(q0_, q1_); } bool ismatch = false; @@ -1023,11 +1046,13 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // of the string, but we're at the end of the text so that's okay. // Leaving q0_ alone preseves the match instructions that led to // the current setting of ismatch. - if (c != kByteEndText || kind_ != Prog::kManyMatch) + if (c != kByteEndText || kind_ != Prog::kManyMatch) { + using std::swap; swap(q0_, q1_); + } // Save afterflag along with ismatch and isword in new state. - uint flag = afterflag; + uint32_t flag = afterflag; if (ismatch) flag |= kFlagMatch; if (isword) @@ -1082,7 +1107,8 @@ class DFA::RWLocker { Mutex* mu_; bool writing_; - DISALLOW_COPY_AND_ASSIGN(RWLocker); + RWLocker(const RWLocker&) = delete; + RWLocker& operator=(const RWLocker&) = delete; }; DFA::RWLocker::RWLocker(Mutex* mu) @@ -1096,7 +1122,7 @@ DFA::RWLocker::RWLocker(Mutex* mu) void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS { if (!writing_) { mu_->ReaderUnlock(); - mu_->Lock(); + mu_->WriterLock(); writing_ = true; } } @@ -1178,11 +1204,12 @@ class DFA::StateSaver { DFA* dfa_; // the DFA to use int* inst_; // saved info from State int ninst_; - uint flag_; + uint32_t flag_; bool is_special_; // whether original state was special State* special_; // if is_special_, the original state - DISALLOW_COPY_AND_ASSIGN(StateSaver); + StateSaver(const StateSaver&) = delete; + StateSaver& operator=(const StateSaver&) = delete; }; DFA::StateSaver::StateSaver(DFA* dfa, State* state) { @@ -1292,15 +1319,17 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, bool want_earliest_match, bool run_forward) { State* start = params->start; - const uint8* bp = BytePtr(params->text.begin()); // start of text - const uint8* p = bp; // text scanning point - const uint8* ep = BytePtr(params->text.end()); // end of text - const uint8* resetp = NULL; // p at last cache reset - if (!run_forward) + const uint8_t* bp = BytePtr(params->text.begin()); // start of text + const uint8_t* p = bp; // text scanning point + const uint8_t* ep = BytePtr(params->text.end()); // end of text + const uint8_t* resetp = NULL; // p at last cache reset + if (!run_forward) { + using std::swap; swap(p, ep); + } - const uint8* bytemap = prog_->bytemap(); - const uint8* lastmatch = NULL; // most recent matching position in text + const uint8_t* bytemap = prog_->bytemap(); + const uint8_t* lastmatch = NULL; // most recent matching position in text bool matched = false; State* s = start; @@ -1315,8 +1344,8 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, while (p != ep) { if (DebugDFA) - fprintf(stderr, "@%d: %s\n", static_cast<int>(p - bp), - DumpState(s).c_str()); + fprintf(stderr, "@%td: %s\n", + p - bp, DumpState(s).c_str()); if (have_firstbyte && s == start) { // In start state, only way out is to find firstbyte, // so use optimized assembly in memchr to skip ahead. @@ -1373,7 +1402,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, // of 10 bytes per state computation, fail so that RE2 can // fall back to the NFA. if (FLAGS_re2_dfa_bail_when_slow && resetp != NULL && - static_cast<unsigned long>(p - resetp) < 10*state_cache_.size()) { + static_cast<size_t>(p - resetp) < 10*state_cache_.size()) { params->failed = true; return false; } @@ -1421,9 +1450,8 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, else lastmatch = p + 1; if (DebugDFA) - fprintf(stderr, "match @%d! [%s]\n", - static_cast<int>(lastmatch - bp), - DumpState(s).c_str()); + fprintf(stderr, "match @%td! [%s]\n", + lastmatch - bp, DumpState(s).c_str()); if (want_earliest_match) { params->ep = reinterpret_cast<const char*>(lastmatch); @@ -1476,7 +1504,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, matched = true; lastmatch = p; if (params->matches && kind_ == Prog::kManyMatch) { - vector<int>* v = params->matches; + std::vector<int>* v = params->matches; v->clear(); for (int i = 0; i < s->ninst_; i++) { Prog::Inst* ip = prog_->inst(s->inst_[i]); @@ -1490,8 +1518,8 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, } } if (DebugDFA) - fprintf(stderr, "match @%d! [%s]\n", static_cast<int>(lastmatch - bp), - DumpState(s).c_str()); + fprintf(stderr, "match @%td! [%s]\n", + lastmatch - bp, DumpState(s).c_str()); } params->ep = reinterpret_cast<const char*>(lastmatch); return matched; @@ -1594,7 +1622,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) { // Determine correct search type. int start; - uint flags; + uint32_t flags; if (params->run_forward) { if (text.begin() == context.begin()) { start = kStartBeginText; @@ -1653,7 +1681,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) { // Fills in info if needed. Returns true on success, false on failure. bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, - uint flags) { + uint32_t flags) { // Quick check. int fb = info->firstbyte.load(std::memory_order_acquire); if (fb != kFbUnknown) @@ -1719,7 +1747,7 @@ bool DFA::Search(const StringPiece& text, bool run_forward, bool* failed, const char** epp, - vector<int>* matches) { + std::vector<int>* matches) { *epp = NULL; if (!ok()) { *failed = true; @@ -1730,7 +1758,7 @@ bool DFA::Search(const StringPiece& text, if (DebugDFA) { fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str()); fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n", - text.as_string().c_str(), anchored, want_earliest_match, + text.ToString().c_str(), anchored, want_earliest_match, run_forward, kind_); } @@ -1791,7 +1819,7 @@ DFA* Prog::GetDFA(MatchKind kind) { // For a reverse DFA, all the memory goes to the // "longest match" DFA, because RE2 never does reverse // "first match" searches. - int64 m = dfa_mem_; + int64_t m = dfa_mem_; if (reversed_) { DCHECK_EQ(kind, kLongestMatch); } else if (kind == kFirstMatch || kind == kLongestMatch) { @@ -1823,8 +1851,8 @@ void Prog::DeleteDFA(std::atomic<DFA*>* pdfa) { // This is the only external interface (class DFA only exists in this file). // bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, - Anchor anchor, MatchKind kind, - StringPiece* match0, bool* failed, vector<int>* matches) { + Anchor anchor, MatchKind kind, StringPiece* match0, + bool* failed, std::vector<int>* matches) { *failed = false; StringPiece context = const_context; @@ -1879,9 +1907,10 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, // as the beginning. if (match0) { if (reversed_) - match0->set(ep, static_cast<int>(text.end() - ep)); + *match0 = StringPiece(ep, static_cast<size_t>(text.end() - ep)); else - match0->set(text.begin(), static_cast<int>(ep - text.begin())); + *match0 = + StringPiece(text.begin(), static_cast<size_t>(ep - text.begin())); } return true; } @@ -1901,7 +1930,7 @@ int DFA::BuildAllStates() { // Add start state to work queue. StateSet queued; - vector<State*> q; + std::vector<State*> q; queued.insert(params.start); q.push_back(params.start); @@ -1945,7 +1974,7 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) { // Also note that previously_visited_states[UnseenStatePtr] will, in the STL // tradition, implicitly insert a '0' value at first use. We take advantage // of that property below. - map<State*, int> previously_visited_states; + std::map<State*, int> previously_visited_states; // Pick out start state for anchored search at beginning of text. RWLocker l(&cache_mutex_); diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc index 5dd65d5..8caf992 100644 --- a/re2/filtered_re2.cc +++ b/re2/filtered_re2.cc @@ -2,9 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include "re2/filtered_re2.h" + +#include <stddef.h> #include <string> + #include "util/util.h" -#include "re2/filtered_re2.h" +#include "util/logging.h" #include "re2/prefilter.h" #include "re2/prefilter_tree.h" @@ -40,7 +44,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, return code; } -void FilteredRE2::Compile(vector<string>* atoms) { +void FilteredRE2::Compile(std::vector<string>* atoms) { if (compiled_ || re2_vec_.size() == 0) { LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size(); return; @@ -63,12 +67,12 @@ int FilteredRE2::SlowFirstMatch(const StringPiece& text) const { } int FilteredRE2::FirstMatch(const StringPiece& text, - const vector<int>& atoms) const { + const std::vector<int>& atoms) const { if (!compiled_) { LOG(DFATAL) << "FirstMatch called before Compile"; return -1; } - vector<int> regexps; + std::vector<int> regexps; prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); for (size_t i = 0; i < regexps.size(); i++) if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) @@ -78,10 +82,10 @@ int FilteredRE2::FirstMatch(const StringPiece& text, bool FilteredRE2::AllMatches( const StringPiece& text, - const vector<int>& atoms, - vector<int>* matching_regexps) const { + const std::vector<int>& atoms, + std::vector<int>* matching_regexps) const { matching_regexps->clear(); - vector<int> regexps; + std::vector<int> regexps; prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); for (size_t i = 0; i < regexps.size(); i++) if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) @@ -90,13 +94,13 @@ bool FilteredRE2::AllMatches( } void FilteredRE2::AllPotentials( - const vector<int>& atoms, - vector<int>* potential_regexps) const { + const std::vector<int>& atoms, + std::vector<int>* potential_regexps) const { prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps); } -void FilteredRE2::RegexpsGivenStrings(const vector<int>& matched_atoms, - vector<int>* passed_regexps) { +void FilteredRE2::RegexpsGivenStrings(const std::vector<int>& matched_atoms, + std::vector<int>* passed_regexps) { prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps); } diff --git a/re2/filtered_re2.h b/re2/filtered_re2.h index 1035a12..48a6fd8 100644 --- a/re2/filtered_re2.h +++ b/re2/filtered_re2.h @@ -21,11 +21,12 @@ // indices of strings that were found in the text to get the actual // regexp matches. +#include <string> #include <vector> + #include "re2/re2.h" namespace re2 { -using std::vector; class PrefilterTree; @@ -47,7 +48,7 @@ class FilteredRE2 { // the search text should be lowercased first to find matching // strings from the set of strings returned by Compile. Call after // all Add calls are done. - void Compile(vector<string>* strings_to_match); + void Compile(std::vector<string>* strings_to_match); // Returns the index of the first matching regexp. // Returns -1 on no match. Can be called prior to Compile. @@ -59,21 +60,21 @@ class FilteredRE2 { // Returns -1 on no match. Compile has to be called before // calling this. int FirstMatch(const StringPiece& text, - const vector<int>& atoms) const; + const std::vector<int>& atoms) const; // Returns the indices of all matching regexps, after first clearing // matched_regexps. bool AllMatches(const StringPiece& text, - const vector<int>& atoms, - vector<int>* matching_regexps) const; + const std::vector<int>& atoms, + std::vector<int>* matching_regexps) const; // Returns the indices of all potentially matching regexps after first // clearing potential_regexps. // A regexp is potentially matching if it passes the filter. // If a regexp passes the filter it may still not match. // A regexp that does not pass the filter is guaranteed to not match. - void AllPotentials(const vector<int>& atoms, - vector<int>* potential_regexps) const; + void AllPotentials(const std::vector<int>& atoms, + std::vector<int>* potential_regexps) const; // The number of regexps added. int NumRegexps() const { return static_cast<int>(re2_vec_.size()); } @@ -87,11 +88,11 @@ class FilteredRE2 { void PrintPrefilter(int regexpid); // Useful for testing and debugging. - void RegexpsGivenStrings(const vector<int>& matched_atoms, - vector<int>* passed_regexps); + void RegexpsGivenStrings(const std::vector<int>& matched_atoms, + std::vector<int>* passed_regexps); // All the regexps in the FilteredRE2. - vector<RE2*> re2_vec_; + std::vector<RE2*> re2_vec_; // Has the FilteredRE2 been compiled using Compile() bool compiled_; @@ -99,9 +100,8 @@ class FilteredRE2 { // An AND-OR tree of string atoms used for filtering regexps. PrefilterTree* prefilter_tree_; - //DISALLOW_COPY_AND_ASSIGN(FilteredRE2); - FilteredRE2(const FilteredRE2&); - void operator=(const FilteredRE2&); + FilteredRE2(const FilteredRE2&) = delete; + FilteredRE2& operator=(const FilteredRE2&) = delete; }; } // namespace re2 diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc index b4e6bb6..849e79d 100644 --- a/re2/fuzzing/re2_fuzzer.cc +++ b/re2/fuzzing/re2_fuzzer.cc @@ -4,7 +4,6 @@ #include <stddef.h> #include <stdint.h> - #include <map> #include <string> @@ -13,7 +12,6 @@ using re2::FLAGS_minloglevel; using re2::StringPiece; -using std::map; using std::string; // NOT static, NOT signed. @@ -25,7 +23,7 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) { return; // Don't waste time fuzzing high-fanout programs. - map<int, int> histogram; + std::map<int, int> histogram; int fanout = re.ProgramFanout(&histogram); if (fanout > 10) return; @@ -53,7 +51,7 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) { // Entry point for libFuzzer. extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - if (size == 0) + if (size == 0 || size > 1000000) return 0; // Suppress logging below FATAL severity. @@ -97,8 +95,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { break; int frac = len / i; - pattern.set(ptr, frac); - text.set(ptr + frac, len - frac); + pattern = StringPiece(ptr, frac); + text = StringPiece(ptr + frac, len - frac); Test(pattern, options, text); } diff --git a/re2/make_unicode_groups.py b/re2/make_unicode_groups.py index 8499793..e97d47e 100755 --- a/re2/make_unicode_groups.py +++ b/re2/make_unicode_groups.py @@ -74,7 +74,7 @@ def PrintGroup(name, codes): ugroup = "{ \"%s\", +1" % (name,) # if len(code16) > 0: - # PrintCodes("uint16", name+"_code16", code16) + # PrintCodes("uint16_t", name+"_code16", code16) # ugroup += ", %s_code16, %d" % (name, len(code16)) # else: # ugroup += ", 0, 0" diff --git a/re2/mimics_pcre.cc b/re2/mimics_pcre.cc index 0a55004..ad197be 100644 --- a/re2/mimics_pcre.cc +++ b/re2/mimics_pcre.cc @@ -23,6 +23,7 @@ // Regexp::MimicsPCRE checks for any of these conditions. #include "util/util.h" +#include "util/logging.h" #include "re2/regexp.h" #include "re2/walker-inl.h" @@ -124,7 +125,8 @@ class EmptyStringWalker : public Regexp::Walker<bool> { } private: - DISALLOW_COPY_AND_ASSIGN(EmptyStringWalker); + EmptyStringWalker(const EmptyStringWalker&) = delete; + EmptyStringWalker& operator=(const EmptyStringWalker&) = delete; }; // Called after visiting re's children. child_args contains the return @@ -24,10 +24,19 @@ // Like Thompson's original machine and like the DFA implementation, this // implementation notices a match only once it is one byte past it. +#include <stdio.h> +#include <string.h> +#include <algorithm> +#include <string> +#include <utility> +#include <vector> + #include "re2/prog.h" #include "re2/regexp.h" +#include "util/logging.h" #include "util/sparse_array.h" #include "util/sparse_set.h" +#include "util/strutil.h" namespace re2 { @@ -121,7 +130,8 @@ class NFA { Thread* free_threads_; // free list - DISALLOW_COPY_AND_ASSIGN(NFA); + NFA(const NFA&) = delete; + NFA& operator=(const NFA&) = delete; }; NFA::NFA(Prog* prog) { @@ -485,7 +495,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, if (Debug) { fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n", - text.as_string().c_str(), context.as_string().c_str(), anchored, + text.ToString().c_str(), context.ToString().c_str(), anchored, longest); } @@ -553,6 +563,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, // This is a no-op the first time around the loop because runq is empty. int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, flag, p-1); DCHECK_EQ(runq->size(), 0); + using std::swap; swap(nextq, runq); nextq->clear(); if (id != 0) { @@ -631,12 +642,12 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, if (matched_) { for (int i = 0; i < nsubmatch; i++) - submatch[i].set(match_[2*i], - static_cast<int>(match_[2*i+1] - match_[2*i])); + submatch[i] = + StringPiece(match_[2 * i], + static_cast<size_t>(match_[2 * i + 1] - match_[2 * i])); if (Debug) - fprintf(stderr, "match (%d,%d)\n", - static_cast<int>(match_[0] - btext_), - static_cast<int>(match_[1] - btext_)); + fprintf(stderr, "match (%td,%td)\n", + match_[0] - btext_, match_[1] - btext_); return true; } VLOG(1) << "No matches found"; diff --git a/re2/onepass.cc b/re2/onepass.cc index da90a86..65eb937 100644 --- a/re2/onepass.cc +++ b/re2/onepass.cc @@ -50,13 +50,26 @@ // See also Anne Brüggemann-Klein and Derick Wood, // "One-unambiguous regular languages", Information and Computation 142(2). +#include <stdint.h> #include <string.h> +#include <algorithm> #include <map> +#include <string> +#include <vector> + #include "util/util.h" +#include "util/logging.h" #include "util/sparse_set.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/prog.h" #include "re2/stringpiece.h" +// Silence "zero-sized array in struct/union" warning for OneState::action. +#ifdef _MSC_VER +#pragma warning(disable: 4200) +#endif + namespace re2 { static const int Debug = 0; @@ -130,11 +143,11 @@ static const int Debug = 0; // maps next input bytes into equivalence classes, to reduce // the memory footprint.) struct OneState { - uint32 matchcond; // conditions to match right now. - uint32 action[]; + uint32_t matchcond; // conditions to match right now. + uint32_t action[]; }; -// The uint32 conditions in the action are a combination of +// The uint32_t conditions in the action are a combination of // condition and capture bits and the next state. The bottom 16 bits // are the condition and capture bits, and the top 16 are the index of // the next state. @@ -151,8 +164,8 @@ struct OneState { // and kEmptyNonWordBoundary, so we can use that as a sentinel // instead of needing an extra bit. -static const int kIndexShift = 16; // number of bits below index -static const int kEmptyShift = 6; // number of empty flags in prog.h +static const int kIndexShift = 16; // number of bits below index +static const int kEmptyShift = 6; // number of empty flags in prog.h static const int kRealCapShift = kEmptyShift + 1; static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2; @@ -160,23 +173,23 @@ static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2; static const int kCapShift = kRealCapShift - 2; static const int kMaxCap = kRealMaxCap + 2; -static const uint32 kMatchWins = 1 << kEmptyShift; -static const uint32 kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift; +static const uint32_t kMatchWins = 1 << kEmptyShift; +static const uint32_t kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift; -static const uint32 kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary; +static const uint32_t kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary; // Check, at compile time, that prog.h agrees with math above. // This function is never called. void OnePass_Checks() { - COMPILE_ASSERT((1<<kEmptyShift)-1 == kEmptyAllFlags, - kEmptyShift_disagrees_with_kEmptyAllFlags); + static_assert((1<<kEmptyShift)-1 == kEmptyAllFlags, + "kEmptyShift disagrees with kEmptyAllFlags"); // kMaxCap counts pointers, kMaxOnePassCapture counts pairs. - COMPILE_ASSERT(kMaxCap == Prog::kMaxOnePassCapture*2, - kMaxCap_disagrees_with_kMaxOnePassCapture); + static_assert(kMaxCap == Prog::kMaxOnePassCapture*2, + "kMaxCap disagrees with kMaxOnePassCapture"); } -static bool Satisfy(uint32 cond, const StringPiece& context, const char* p) { - uint32 satisfied = Prog::EmptyFlags(context, p); +static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) { + uint32_t satisfied = Prog::EmptyFlags(context, p); if (cond & kEmptyAllFlags & ~satisfied) return false; return true; @@ -184,7 +197,7 @@ static bool Satisfy(uint32 cond, const StringPiece& context, const char* p) { // Apply the capture bits in cond, saving p to the appropriate // locations in cap[]. -static void ApplyCaptures(uint32 cond, const char* p, +static void ApplyCaptures(uint32_t cond, const char* p, const char** cap, int ncap) { for (int i = 2; i < ncap; i++) if (cond & (1 << kCapShift << i)) @@ -192,7 +205,7 @@ static void ApplyCaptures(uint32 cond, const char* p, } // Computes the OneState* for the given nodeindex. -static inline OneState* IndexToNode(uint8* nodes, int statesize, +static inline OneState* IndexToNode(uint8_t* nodes, int statesize, int nodeindex) { return reinterpret_cast<OneState*>(nodes + statesize*nodeindex); } @@ -230,27 +243,27 @@ bool Prog::SearchOnePass(const StringPiece& text, if (anchor_end()) kind = kFullMatch; - uint8* nodes = onepass_nodes_; - int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32); + uint8_t* nodes = onepass_nodes_; + int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t); // start() is always mapped to the zeroth OneState. OneState* state = IndexToNode(nodes, statesize, 0); - uint8* bytemap = bytemap_; + uint8_t* bytemap = bytemap_; const char* bp = text.begin(); const char* ep = text.end(); const char* p; bool matched = false; matchcap[0] = bp; cap[0] = bp; - uint32 nextmatchcond = state->matchcond; + uint32_t nextmatchcond = state->matchcond; for (p = bp; p < ep; p++) { int c = bytemap[*p & 0xFF]; - uint32 matchcond = nextmatchcond; - uint32 cond = state->action[c]; + uint32_t matchcond = nextmatchcond; + uint32_t cond = state->action[c]; // Determine whether we can reach act->next. // If so, advance state and nextmatchcond. if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) { - uint32 nextindex = cond >> kIndexShift; + uint32_t nextindex = cond >> kIndexShift; state = IndexToNode(nodes, statesize, nextindex); nextmatchcond = state->matchcond; } else { @@ -309,7 +322,7 @@ bool Prog::SearchOnePass(const StringPiece& text, // Look for match at end of input. { - uint32 matchcond = state->matchcond; + uint32_t matchcond = state->matchcond; if (matchcond != kImpossible && ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) { if (nmatch > 1 && (matchcond & kCapMask)) @@ -325,8 +338,9 @@ done: if (!matched) return false; for (int i = 0; i < nmatch; i++) - match[i].set(matchcap[2*i], - static_cast<int>(matchcap[2*i+1] - matchcap[2*i])); + match[i] = + StringPiece(matchcap[2 * i], + static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i])); return true; } @@ -348,7 +362,7 @@ static bool AddQ(Instq *q, int id) { struct InstCond { int id; - uint32 cond; + uint32_t cond; }; // Returns whether this is a one-pass program; that is, @@ -379,7 +393,7 @@ bool Prog::IsOnePass() { // Limit max node count to 65000 as a conservative estimate to // avoid overflowing 16-bit node index in encoding. int maxnodes = 2 + inst_count(kInstByteRange); - int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32); + int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t); if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes) return false; @@ -395,10 +409,10 @@ bool Prog::IsOnePass() { int* nodebyid = new int[size]; // indexed by ip memset(nodebyid, 0xFF, size*sizeof nodebyid[0]); - // Originally, nodes was a uint8[maxnodes*statesize], but that was + // Originally, nodes was a uint8_t[maxnodes*statesize], but that was // unnecessarily optimistic: why allocate a large amount of memory // upfront for a large program when it is unlikely to be one-pass? - vector<uint8> nodes; + std::vector<uint8_t> nodes; Instq tovisit(size), workq(size); AddQ(&tovisit, start()); @@ -423,7 +437,7 @@ bool Prog::IsOnePass() { stack[nstack++].cond = 0; while (nstack > 0) { int id = stack[--nstack].id; - uint32 cond = stack[nstack].cond; + uint32_t cond = stack[nstack].cond; Loop: Prog::Inst* ip = inst(id); @@ -464,8 +478,8 @@ bool Prog::IsOnePass() { // Skip any bytes immediately after c that are also in b. while (c < 256-1 && bytemap_[c+1] == b) c++; - uint32 act = node->action[b]; - uint32 newact = (nextindex << kIndexShift) | cond; + uint32_t act = node->action[b]; + uint32_t newact = (nextindex << kIndexShift) | cond; if (matched) newact |= kMatchWins; if ((act & kImpossible) == kImpossible) { @@ -479,15 +493,15 @@ bool Prog::IsOnePass() { } } if (ip->foldcase()) { - Rune lo = max<Rune>(ip->lo(), 'a') + 'A' - 'a'; - Rune hi = min<Rune>(ip->hi(), 'z') + 'A' - 'a'; + Rune lo = std::max<Rune>(ip->lo(), 'a') + 'A' - 'a'; + Rune hi = std::min<Rune>(ip->hi(), 'z') + 'A' - 'a'; for (int c = lo; c <= hi; c++) { int b = bytemap_[c]; // Skip any bytes immediately after c that are also in b. while (c < 256-1 && bytemap_[c+1] == b) c++; - uint32 act = node->action[b]; - uint32 newact = (nextindex << kIndexShift) | cond; + uint32_t act = node->action[b]; + uint32_t newact = (nextindex << kIndexShift) | cond; if (matched) newact |= kMatchWins; if ((act & kImpossible) == kImpossible) { @@ -574,7 +588,7 @@ bool Prog::IsOnePass() { LOG(ERROR) << "bytemap:\n" << DumpByteMap(); LOG(ERROR) << "prog:\n" << Dump(); - map<int, int> idmap; + std::map<int, int> idmap; for (int i = 0; i < size; i++) if (nodebyid[i] != -1) idmap[nodebyid[i]] = i; @@ -601,7 +615,7 @@ bool Prog::IsOnePass() { } dfa_mem_ -= nalloc*statesize; - onepass_nodes_ = new uint8[nalloc*statesize]; + onepass_nodes_ = new uint8_t[nalloc*statesize]; memmove(onepass_nodes_, nodes.data(), nalloc*statesize); delete[] stack; diff --git a/re2/parse.cc b/re2/parse.cc index 9cd9cc1..b71b3ea 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -16,7 +16,18 @@ // and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W. // See regexp.h for rationale. +#include <ctype.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <algorithm> +#include <map> +#include <string> + #include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/regexp.h" #include "re2/stringpiece.h" #include "re2/unicode_casefold.h" @@ -163,7 +174,8 @@ private: int ncap_; // number of capturing parens seen int rune_max_; // maximum char value for this encoding - DISALLOW_COPY_AND_ASSIGN(ParseState); + ParseState(const ParseState&) = delete; + ParseState& operator=(const ParseState&) = delete; }; // Pseudo-operators - only on parse stack. @@ -346,7 +358,7 @@ static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) { // Add in the result of folding the range lo - f->hi // and that range's fold, recursively. Rune lo1 = lo; - Rune hi1 = min<Rune>(hi, f->hi); + Rune hi1 = std::min<Rune>(hi, f->hi); switch (f->delta) { default: lo1 += f->delta; @@ -488,7 +500,8 @@ class RepetitionWalker : public Regexp::Walker<int> { virtual int ShortVisit(Regexp* re, int parent_arg); private: - DISALLOW_COPY_AND_ASSIGN(RepetitionWalker); + RepetitionWalker(const RepetitionWalker&) = delete; + RepetitionWalker& operator=(const RepetitionWalker&) = delete; }; int RepetitionWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { @@ -571,7 +584,7 @@ bool Regexp::ParseState::DoLeftParen(const StringPiece& name) { Regexp* re = new Regexp(kLeftParen, flags_); re->cap_ = ++ncap_; if (name.data() != NULL) - re->name_ = new string(name.as_string()); + re->name_ = new string(name.ToString()); return PushRegexp(re); } @@ -1176,7 +1189,7 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) { if (r >= 0) { re1->op_ = kRegexpLiteral; re1->rune_ = r; - re1->parse_flags_ = static_cast<uint16>(flags); + re1->parse_flags_ = static_cast<uint16_t>(flags); return true; } @@ -1256,9 +1269,11 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) { // Argument order is backwards from usual Google style // but consistent with chartorune. static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) { - int n; - if (fullrune(sp->data(), sp->size())) { - n = chartorune(r, sp->data()); + // fullrune() takes int, not size_t. However, it just looks + // at the leading byte and treats any length >= 4 the same. + if (fullrune(sp->data(), static_cast<int>(std::min(static_cast<size_t>(4), + sp->size())))) { + int n = chartorune(r, sp->data()); // Some copies of chartorune have a bug that accepts // encodings of values in (10FFFF, 1FFFFF] as valid. // Those values break the character class algorithm, @@ -1457,7 +1472,7 @@ BadEscape: // Unrecognized escape sequence. status->set_code(kRegexpBadEscape); status->set_error_arg( - StringPiece(begin, static_cast<int>(s->data() - begin))); + StringPiece(begin, static_cast<size_t>(s->begin() - begin))); return false; } @@ -1618,25 +1633,25 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, if (c != '{') { // Name is the bit of string we just skipped over for c. const char* p = seq.begin() + 2; - name = StringPiece(p, static_cast<int>(s->begin() - p)); + name = StringPiece(p, static_cast<size_t>(s->begin() - p)); } else { // Name is in braces. Look for closing } size_t end = s->find('}', 0); - if (end == s->npos) { + if (end == StringPiece::npos) { if (!IsValidUTF8(seq, status)) return kParseError; status->set_code(kRegexpBadCharRange); status->set_error_arg(seq); return kParseError; } - name = StringPiece(s->begin(), static_cast<int>(end)); // without '}' - s->remove_prefix(static_cast<int>(end) + 1); // with '}' + name = StringPiece(s->begin(), end); // without '}' + s->remove_prefix(end + 1); // with '}' if (!IsValidUTF8(name, status)) return kParseError; } // Chop seq where s now begins. - seq = StringPiece(seq.begin(), static_cast<int>(s->begin() - seq.begin())); + seq = StringPiece(seq.begin(), static_cast<size_t>(s->begin() - seq.begin())); if (name.size() > 0 && name[0] == '^') { sign = -sign; @@ -1704,7 +1719,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags, // Got it. Check that it's valid. q += 2; - StringPiece name(p, static_cast<int>(q-p)); + StringPiece name(p, static_cast<size_t>(q - p)); const UGroup *g = LookupPosixGroup(name); if (g == NULL) { @@ -1759,7 +1774,7 @@ bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr, if (rr->hi < rr->lo) { status->set_code(kRegexpBadCharRange); status->set_error_arg( - StringPiece(os.data(), static_cast<int>(s->data() - os.data()))); + StringPiece(os.data(), static_cast<size_t>(s->data() - os.data()))); return false; } } else { @@ -1885,7 +1900,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s, static bool IsValidCaptureName(const StringPiece& name) { if (name.size() == 0) return false; - for (int i = 0; i < name.size(); i++) { + for (size_t i = 0; i < name.size(); i++) { int c = name[i]; if (('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || @@ -1932,7 +1947,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { if (t.size() > 2 && t[0] == 'P' && t[1] == '<') { // Pull out name. size_t end = t.find('>', 2); - if (end == t.npos) { + if (end == StringPiece::npos) { if (!IsValidUTF8(*s, status_)) return false; status_->set_code(kRegexpBadNamedCapture); @@ -1941,8 +1956,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { } // t is "P<name>...", t[end] == '>' - StringPiece capture(t.begin()-2, static_cast<int>(end)+3); // "(?P<name>" - StringPiece name(t.begin()+2, static_cast<int>(end)-2); // "name" + StringPiece capture(t.begin()-2, end+3); // "(?P<name>" + StringPiece name(t.begin()+2, end-2); // "name" if (!IsValidUTF8(name, status_)) return false; if (!IsValidCaptureName(name)) { @@ -1956,7 +1971,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { return false; } - s->remove_prefix(static_cast<int>(capture.end() - s->begin())); + s->remove_prefix(static_cast<size_t>(capture.end() - s->begin())); return true; } @@ -2040,7 +2055,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { BadPerlOp: status_->set_code(kRegexpBadPerlOp); status_->set_error_arg( - StringPiece(s->begin(), static_cast<int>(t.begin() - s->begin()))); + StringPiece(s->begin(), static_cast<size_t>(t.begin() - s->begin()))); return false; } @@ -2052,7 +2067,7 @@ void ConvertLatin1ToUTF8(const StringPiece& latin1, string* utf) { char buf[UTFmax]; utf->clear(); - for (int i = 0; i < latin1.size(); i++) { + for (size_t i = 0; i < latin1.size(); i++) { Rune r = latin1[i] & 0xFF; int n = runetochar(buf, &r); utf->append(buf, n); @@ -2187,13 +2202,14 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, // a** is a syntax error, not a double-star. // (and a++ means something else entirely, which we don't support!) status->set_code(kRegexpRepeatOp); - status->set_error_arg( - StringPiece(lastunary.begin(), - static_cast<int>(t.begin() - lastunary.begin()))); + status->set_error_arg(StringPiece( + lastunary.begin(), + static_cast<size_t>(t.begin() - lastunary.begin()))); return NULL; } } - opstr.set(opstr.data(), static_cast<int>(t.data() - opstr.data())); + opstr = StringPiece(opstr.data(), + static_cast<size_t>(t.data() - opstr.data())); if (!ps.PushRepeatOp(op, opstr, nongreedy)) return NULL; isunary = opstr; @@ -2219,13 +2235,14 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, if (lastunary.size() > 0) { // Not allowed to stack repetition operators. status->set_code(kRegexpRepeatOp); - status->set_error_arg( - StringPiece(lastunary.begin(), - static_cast<int>(t.begin() - lastunary.begin()))); + status->set_error_arg(StringPiece( + lastunary.begin(), + static_cast<size_t>(t.begin() - lastunary.begin()))); return NULL; } } - opstr.set(opstr.data(), static_cast<int>(t.data() - opstr.data())); + opstr = StringPiece(opstr.data(), + static_cast<size_t>(t.data() - opstr.data())); if (!ps.PushRepetition(lo, hi, opstr, nongreedy)) return NULL; isunary = opstr; diff --git a/re2/prefilter.cc b/re2/prefilter.cc index f171eec..eaf4a27 100644 --- a/re2/prefilter.cc +++ b/re2/prefilter.cc @@ -2,8 +2,17 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "util/util.h" #include "re2/prefilter.h" + +#include <stddef.h> +#include <stdint.h> +#include <string> +#include <vector> + +#include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/re2.h" #include "re2/unicode_casefold.h" #include "re2/walker-inl.h" @@ -12,15 +21,15 @@ namespace re2 { static const int Trace = false; -typedef set<string>::iterator SSIter; -typedef set<string>::const_iterator ConstSSIter; +typedef std::set<string>::iterator SSIter; +typedef std::set<string>::const_iterator ConstSSIter; // Initializes a Prefilter, allocating subs_ as necessary. Prefilter::Prefilter(Op op) { op_ = op; subs_ = NULL; if (op_ == AND || op_ == OR) - subs_ = new vector<Prefilter*>; + subs_ = new std::vector<Prefilter*>; VLOG(10) << "constructed: " << reinterpret_cast<intptr_t>(this); } @@ -134,7 +143,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) { return AndOr(OR, a, b); } -static void SimplifyStringSet(set<string> *ss) { +static void SimplifyStringSet(std::set<string> *ss) { // Now make sure that the strings aren't redundant. For example, if // we know "ab" is a required string, then it doesn't help at all to // know that "abc" is also a required string, so delete "abc". This @@ -155,7 +164,7 @@ static void SimplifyStringSet(set<string> *ss) { } } -Prefilter* Prefilter::OrStrings(set<string>* ss) { +Prefilter* Prefilter::OrStrings(std::set<string>* ss) { SimplifyStringSet(ss); Prefilter* or_prefilter = NULL; if (!ss->empty()) { @@ -220,14 +229,14 @@ class Prefilter::Info { // Caller takes ownership of the Prefilter. Prefilter* TakeMatch(); - set<string>& exact() { return exact_; } + std::set<string>& exact() { return exact_; } bool is_exact() const { return is_exact_; } class Walker; private: - set<string> exact_; + std::set<string> exact_; // When is_exact_ is true, the strings that match // are placed in exact_. When it is no longer an exact @@ -266,7 +275,9 @@ string Prefilter::Info::ToString() { if (is_exact_) { int n = 0; string s; - for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) { + for (std::set<string>::iterator i = exact_.begin(); + i != exact_.end(); + ++i) { if (n++ > 0) s += ","; s += *i; @@ -281,16 +292,17 @@ string Prefilter::Info::ToString() { } // Add the strings from src to dst. -static void CopyIn(const set<string>& src, set<string>* dst) { +static void CopyIn(const std::set<string>& src, + std::set<string>* dst) { for (ConstSSIter i = src.begin(); i != src.end(); ++i) dst->insert(*i); } // Add the cross-product of a and b to dst. // (For each string i in a and j in b, add i+j.) -static void CrossProduct(const set<string>& a, - const set<string>& b, - set<string>* dst) { +static void CrossProduct(const std::set<string>& a, + const std::set<string>& b, + std::set<string>* dst) { for (ConstSSIter i = a.begin(); i != a.end(); ++i) for (ConstSSIter j = b.begin(); j != b.end(); ++j) dst->insert(*i + *j); @@ -490,7 +502,9 @@ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> { bool latin1() { return latin1_; } private: bool latin1_; - DISALLOW_COPY_AND_ASSIGN(Walker); + + Walker(const Walker&) = delete; + Walker& operator=(const Walker&) = delete; }; Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { diff --git a/re2/prefilter.h b/re2/prefilter.h index e58efe8..f400e4f 100644 --- a/re2/prefilter.h +++ b/re2/prefilter.h @@ -9,7 +9,12 @@ // Rather than using Prefilter class directly, use FilteredRE2. // See filtered_re2.h +#include <set> +#include <string> +#include <vector> + #include "util/util.h" +#include "util/logging.h" namespace re2 { @@ -37,14 +42,14 @@ class Prefilter { int unique_id() const { return unique_id_; } // The children of the Prefilter node. - vector<Prefilter*>* subs() { + std::vector<Prefilter*>* subs() { CHECK(op_ == AND || op_ == OR); return subs_; } // Set the children vector. Prefilter takes ownership of subs and // subs_ will be deleted when Prefilter is deleted. - void set_subs(vector<Prefilter*>* subs) { subs_ = subs; } + void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; } // Given a RE2, return a Prefilter. The caller takes ownership of // the Prefilter and should deallocate it. Returns NULL if Prefilter @@ -72,7 +77,7 @@ class Prefilter { static Prefilter* FromString(const string& str); - static Prefilter* OrStrings(set<string>* ss); + static Prefilter* OrStrings(std::set<string>* ss); static Info* BuildInfo(Regexp* re); @@ -82,7 +87,7 @@ class Prefilter { Op op_; // Sub-matches for AND or OR Prefilter. - vector<Prefilter*>* subs_; + std::vector<Prefilter*>* subs_; // Actual string to match in leaf node. string atom_; @@ -94,7 +99,8 @@ class Prefilter { // and -1 for duplicate nodes. int unique_id_; - DISALLOW_COPY_AND_ASSIGN(Prefilter); + Prefilter(const Prefilter&) = delete; + Prefilter& operator=(const Prefilter&) = delete; }; } // namespace re2 diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc index be9b584..4238674 100644 --- a/re2/prefilter_tree.cc +++ b/re2/prefilter_tree.cc @@ -2,10 +2,21 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include "re2/prefilter_tree.h" + +#include <stddef.h> +#include <algorithm> +#include <map> +#include <memory> +#include <set> +#include <string> +#include <utility> +#include <vector> + #include "util/util.h" #include "util/flags.h" +#include "util/logging.h" #include "re2/prefilter.h" -#include "re2/prefilter_tree.h" #include "re2/re2.h" DEFINE_int32(filtered_re2_min_atom_len, @@ -47,7 +58,7 @@ static bool KeepPart(Prefilter* prefilter, int level) { case Prefilter::AND: { int j = 0; - vector<Prefilter*>* subs = prefilter->subs(); + std::vector<Prefilter*>* subs = prefilter->subs(); for (size_t i = 0; i < subs->size(); i++) if (KeepPart((*subs)[i], level + 1)) (*subs)[j++] = (*subs)[i]; @@ -79,7 +90,7 @@ void PrefilterTree::Add(Prefilter *f) { prefilter_vec_.push_back(f); } -void PrefilterTree::Compile(vector<string>* atom_vec) { +void PrefilterTree::Compile(std::vector<string>* atom_vec) { if (compiled_) { LOG(DFATAL) << "Compile after Compile."; return; @@ -130,39 +141,33 @@ void PrefilterTree::Compile(vector<string>* atom_vec) { Prefilter* PrefilterTree::CanonicalNode(Prefilter* node) { string node_string = NodeString(node); - map<string, Prefilter*>::iterator iter = node_map_.find(node_string); + std::map<string, Prefilter*>::iterator iter = node_map_.find(node_string); if (iter == node_map_.end()) return NULL; return (*iter).second; } -static string Itoa(int n) { - char buf[100]; - snprintf(buf, sizeof buf, "%d", n); - return string(buf); -} - string PrefilterTree::NodeString(Prefilter* node) const { // Adding the operation disambiguates AND/OR/atom nodes. - string s = Itoa(node->op()) + ":"; + string s = std::to_string(node->op()) + ":"; if (node->op() == Prefilter::ATOM) { s += node->atom(); } else { for (size_t i = 0; i < node->subs()->size(); i++) { if (i > 0) s += ','; - s += Itoa((*node->subs())[i]->unique_id()); + s += std::to_string((*node->subs())[i]->unique_id()); } } return s; } -void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { +void PrefilterTree::AssignUniqueIds(std::vector<string>* atom_vec) { atom_vec->clear(); // Build vector of all filter nodes, sorted topologically // from top to bottom in v. - vector<Prefilter*> v; + std::vector<Prefilter*> v; // Add the top level nodes of each regexp prefilter. for (size_t i = 0; i < prefilter_vec_.size(); i++) { @@ -181,7 +186,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { if (f == NULL) continue; if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) { - const vector<Prefilter*>& subs = *f->subs(); + const std::vector<Prefilter*>& subs = *f->subs(); for (size_t j = 0; j < subs.size(); j++) v.push_back(subs[j]); } @@ -246,7 +251,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { case Prefilter::OR: case Prefilter::AND: { - set<int> uniq_child; + std::set<int> uniq_child; for (size_t j = 0; j < prefilter->subs()->size(); j++) { Prefilter* child = (*prefilter->subs())[j]; Prefilter* canonical = CanonicalNode(child); @@ -285,8 +290,8 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { // Functions for triggering during search. void PrefilterTree::RegexpsGivenStrings( - const vector<int>& matched_atoms, - vector<int>* regexps) const { + const std::vector<int>& matched_atoms, + std::vector<int>* regexps) const { regexps->clear(); if (!compiled_) { LOG(WARNING) << "Compile() not called"; @@ -295,7 +300,7 @@ void PrefilterTree::RegexpsGivenStrings( } else { if (!prefilter_vec_.empty()) { IntMap regexps_map(static_cast<int>(prefilter_vec_.size())); - vector<int> matched_atom_ids; + std::vector<int> matched_atom_ids; for (size_t j = 0; j < matched_atoms.size(); j++) { matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]); VLOG(10) << "Atom id:" << atom_index_to_id_[matched_atoms[j]]; @@ -309,10 +314,10 @@ void PrefilterTree::RegexpsGivenStrings( regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end()); } } - sort(regexps->begin(), regexps->end()); + std::sort(regexps->begin(), regexps->end()); } -void PrefilterTree::PropagateMatch(const vector<int>& atom_ids, +void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids, IntMap* regexps) const { IntMap count(static_cast<int>(entries_.size())); IntMap work(static_cast<int>(entries_.size())); @@ -364,14 +369,14 @@ void PrefilterTree::PrintDebugInfo() { for (size_t i = 0; i < entries_.size(); ++i) { StdIntMap* parents = entries_[i].parents; - const vector<int>& regexps = entries_[i].regexps; + const std::vector<int>& regexps = entries_[i].regexps; VLOG(10) << "EntryId: " << i << " N: " << parents->size() << " R: " << regexps.size(); for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) VLOG(10) << it->first; } VLOG(10) << "Map:"; - for (map<string, Prefilter*>::const_iterator iter = node_map_.begin(); + for (std::map<string, Prefilter*>::const_iterator iter = node_map_.begin(); iter != node_map_.end(); ++iter) VLOG(10) << "NodeId: " << (*iter).second->unique_id() << " Str: " << (*iter).first; @@ -390,7 +395,7 @@ string PrefilterTree::DebugNodeString(Prefilter* node) const { for (size_t i = 0; i < node->subs()->size(); i++) { if (i > 0) node_string += ','; - node_string += Itoa((*node->subs())[i]->unique_id()); + node_string += std::to_string((*node->subs())[i]->unique_id()); node_string += ":"; node_string += DebugNodeString((*node->subs())[i]); } diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h index a8ec589..276d934 100644 --- a/re2/prefilter_tree.h +++ b/re2/prefilter_tree.h @@ -16,13 +16,17 @@ // atoms) that the user of this class should use to do the string // matching. +#include <map> +#include <string> +#include <vector> + #include "util/util.h" #include "util/sparse_array.h" namespace re2 { typedef SparseArray<int> IntMap; -typedef map<int, int> StdIntMap; +typedef std::map<int, int> StdIntMap; class Prefilter; @@ -42,15 +46,15 @@ class PrefilterTree { // The caller should use the returned set of strings to do string matching. // Each time a string matches, the corresponding index then has to be // and passed to RegexpsGivenStrings below. - void Compile(vector<string>* atom_vec); + void Compile(std::vector<string>* atom_vec); // Given the indices of the atoms that matched, returns the indexes // of regexps that should be searched. The matched_atoms should // contain all the ids of string atoms that were found to match the // content. The caller can use any string match engine to perform // this function. This function is thread safe. - void RegexpsGivenStrings(const vector<int>& matched_atoms, - vector<int>* regexps) const; + void RegexpsGivenStrings(const std::vector<int>& matched_atoms, + std::vector<int>* regexps) const; // Print debug prefilter. Also prints unique ids associated with // nodes of the prefilter of the regexp. @@ -76,17 +80,17 @@ class PrefilterTree { // When this node is ready to trigger the parent, what are the // regexps that are triggered. - vector<int> regexps; + std::vector<int> regexps; }; private: // This function assigns unique ids to various parts of the // prefilter, by looking at if these nodes are already in the // PrefilterTree. - void AssignUniqueIds(vector<string>* atom_vec); + void AssignUniqueIds(std::vector<string>* atom_vec); // Given the matching atoms, find the regexps to be triggered. - void PropagateMatch(const vector<int>& atom_ids, + void PropagateMatch(const std::vector<int>& atom_ids, IntMap* regexps) const; // Returns the prefilter node that has the same NodeString as this @@ -105,25 +109,26 @@ class PrefilterTree { // These are all the nodes formed by Compile. Essentially, there is // one node for each unique atom and each unique AND/OR node. - vector<Entry> entries_; + std::vector<Entry> entries_; // Map node string to canonical Prefilter node. - map<string, Prefilter*> node_map_; + std::map<string, Prefilter*> node_map_; // indices of regexps that always pass through the filter (since we // found no required literals in these regexps). - vector<int> unfiltered_; + std::vector<int> unfiltered_; // vector of Prefilter for all regexps. - vector<Prefilter*> prefilter_vec_; + std::vector<Prefilter*> prefilter_vec_; // Atom index in returned strings to entry id mapping. - vector<int> atom_index_to_id_; + std::vector<int> atom_index_to_id_; // Has the prefilter tree been compiled. bool compiled_; - DISALLOW_COPY_AND_ASSIGN(PrefilterTree); + PrefilterTree(const PrefilterTree&) = delete; + PrefilterTree& operator=(const PrefilterTree&) = delete; }; } // namespace diff --git a/re2/prog.cc b/re2/prog.cc index 5d8dd6c..bd83422 100644 --- a/re2/prog.cc +++ b/re2/prog.cc @@ -5,22 +5,31 @@ // Compiled regular expression representation. // Tested by compile_test.cc -#include "util/util.h" -#include "util/bitmap.h" #include "re2/prog.h" + +#include <stdint.h> +#include <string.h> +#include <algorithm> +#include <memory> +#include <utility> + +#include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "re2/bitmap256.h" #include "re2/stringpiece.h" namespace re2 { // Constructors per Inst opcode -void Prog::Inst::InitAlt(uint32 out, uint32 out1) { +void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) { DCHECK_EQ(out_opcode_, 0); set_out_opcode(out, kInstAlt); out1_ = out1; } -void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32 out) { +void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) { DCHECK_EQ(out_opcode_, 0); set_out_opcode(out, kInstByteRange); lo_ = lo & 0xFF; @@ -28,25 +37,25 @@ void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32 out) { foldcase_ = foldcase & 0xFF; } -void Prog::Inst::InitCapture(int cap, uint32 out) { +void Prog::Inst::InitCapture(int cap, uint32_t out) { DCHECK_EQ(out_opcode_, 0); set_out_opcode(out, kInstCapture); cap_ = cap; } -void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32 out) { +void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) { DCHECK_EQ(out_opcode_, 0); set_out_opcode(out, kInstEmptyWidth); empty_ = empty; } -void Prog::Inst::InitMatch(int32 id) { +void Prog::Inst::InitMatch(int32_t id) { DCHECK_EQ(out_opcode_, 0); set_opcode(kInstMatch); match_id_ = id; } -void Prog::Inst::InitNop(uint32 out) { +void Prog::Inst::InitNop(uint32_t out) { DCHECK_EQ(out_opcode_, 0); set_opcode(kInstNop); } @@ -279,7 +288,7 @@ static bool IsMatch(Prog* prog, Prog::Inst* ip) { } } -uint32 Prog::EmptyFlags(const StringPiece& text, const char* p) { +uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) { int flags = 0; // ^ and \A @@ -343,18 +352,19 @@ class ByteMapBuilder { void Mark(int lo, int hi); void Merge(); - void Build(uint8* bytemap, int* bytemap_range); + void Build(uint8_t* bytemap, int* bytemap_range); private: int Recolor(int oldcolor); Bitmap256 splits_; - vector<int> colors_; + std::vector<int> colors_; int nextcolor_; - vector<pair<int, int>> colormap_; - vector<pair<int, int>> ranges_; + std::vector<std::pair<int, int>> colormap_; + std::vector<std::pair<int, int>> ranges_; - DISALLOW_COPY_AND_ASSIGN(ByteMapBuilder); + ByteMapBuilder(const ByteMapBuilder&) = delete; + ByteMapBuilder& operator=(const ByteMapBuilder&) = delete; }; void ByteMapBuilder::Mark(int lo, int hi) { @@ -373,7 +383,7 @@ void ByteMapBuilder::Mark(int lo, int hi) { } void ByteMapBuilder::Merge() { - for (vector<pair<int, int>>::const_iterator it = ranges_.begin(); + for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin(); it != ranges_.end(); ++it) { int lo = it->first-1; @@ -403,14 +413,14 @@ void ByteMapBuilder::Merge() { ranges_.clear(); } -void ByteMapBuilder::Build(uint8* bytemap, int* bytemap_range) { +void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) { // Assign byte classes numbered from 0. nextcolor_ = 0; int c = 0; while (c < 256) { int next = splits_.FindNextSetBit(c); - uint8 b = static_cast<uint8>(Recolor(colors_[next])); + uint8_t b = static_cast<uint8_t>(Recolor(colors_[next])); while (c <= next) { bytemap[c] = b; c++; @@ -425,9 +435,9 @@ int ByteMapBuilder::Recolor(int oldcolor) { // colors and there will typically be far fewer than that. // Also, we need to consider keys *and* values in order to // avoid recoloring a given range more than once per batch. - vector<pair<int, int>>::const_iterator it = + std::vector<std::pair<int, int>>::const_iterator it = std::find_if(colormap_.begin(), colormap_.end(), - [&](const pair<int, int>& kv) -> bool { + [=](const std::pair<int, int>& kv) -> bool { return kv.first == oldcolor || kv.second == oldcolor; }); if (it != colormap_.end()) @@ -487,11 +497,11 @@ void Prog::ComputeByteMap() { int j; for (int i = 0; i < 256; i = j) { for (j = i + 1; j < 256 && - Prog::IsWordChar(static_cast<uint8>(i)) == - Prog::IsWordChar(static_cast<uint8>(j)); + Prog::IsWordChar(static_cast<uint8_t>(i)) == + Prog::IsWordChar(static_cast<uint8_t>(j)); j++) ; - if (Prog::IsWordChar(static_cast<uint8>(i)) == isword) + if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword) builder.Mark(i, j - 1); } builder.Merge(); @@ -505,7 +515,7 @@ void Prog::ComputeByteMap() { if (0) { // For debugging: use trivial bytemap. for (int i = 0; i < 256; i++) - bytemap_[i] = static_cast<uint8>(i); + bytemap_[i] = static_cast<uint8_t>(i); bytemap_range_ = 256; LOG(INFO) << "Using trivial bytemap."; } @@ -519,7 +529,7 @@ void Prog::Flatten() { // Scratch structures. It's important that these are reused by EmitList() // because we call it in a loop and it would thrash the heap otherwise. SparseSet q(size()); - vector<int> stk; + std::vector<int> stk; stk.reserve(size()); // First pass: Marks "roots". @@ -529,8 +539,8 @@ void Prog::Flatten() { // Second pass: Emits "lists". Remaps outs to root-ids. // Builds the mapping from root-ids to flat-ids. - vector<int> flatmap(rootmap.size()); - vector<Inst> flat; + std::vector<int> flatmap(rootmap.size()); + std::vector<Inst> flat; flat.reserve(size()); for (SparseArray<int>::const_iterator i = rootmap.begin(); i != rootmap.end(); @@ -576,8 +586,8 @@ void Prog::Flatten() { memmove(inst_, flat.data(), size_ * sizeof *inst_); } -void Prog::MarkRoots(SparseArray<int>* rootmap, - SparseSet* q, vector<int>* stk) { +void Prog::MarkRoots(SparseArray<int>* rootmap, SparseSet* q, + std::vector<int>* stk) { // Mark the kInstFail instruction. rootmap->set_new(0, rootmap->size()); @@ -630,8 +640,9 @@ void Prog::MarkRoots(SparseArray<int>* rootmap, } } -void Prog::EmitList(int root, SparseArray<int>* rootmap, vector<Inst>* flat, - SparseSet* q, vector<int>* stk) { +void Prog::EmitList(int root, SparseArray<int>* rootmap, + std::vector<Inst>* flat, SparseSet* q, + std::vector<int>* stk) { q->clear(); stk->clear(); stk->push_back(root); @@ -662,7 +673,7 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap, vector<Inst>* flat, flat->emplace_back(); flat->back().set_opcode(kInstAltMatch); flat->back().set_out(static_cast<int>(flat->size())); - flat->back().out1_ = static_cast<uint32>(flat->size())+1; + flat->back().out1_ = static_cast<uint32_t>(flat->size())+1; FALLTHROUGH_INTENDED; case kInstAlt: @@ -9,7 +9,15 @@ // See regexp.h for the Regexp class, which represents a regular // expression symbolically. +#include <stdint.h> +#include <atomic> +#include <mutex> +#include <string> +#include <vector> + #include "util/util.h" +#include "util/logging.h" +#include "util/mutex.h" #include "util/sparse_array.h" #include "util/sparse_set.h" #include "re2/re2.h" @@ -40,10 +48,8 @@ enum EmptyOp { kEmptyAllFlags = (1<<6)-1, }; -class Regexp; - class DFA; -struct OneState; +class Regexp; // Compiled form of regexp program. class Prog { @@ -61,12 +67,12 @@ class Prog { Inst& operator=(const Inst&) = default; // Constructors per opcode - void InitAlt(uint32 out, uint32 out1); - void InitByteRange(int lo, int hi, int foldcase, uint32 out); - void InitCapture(int cap, uint32 out); - void InitEmptyWidth(EmptyOp empty, uint32 out); + void InitAlt(uint32_t out, uint32_t out1); + void InitByteRange(int lo, int hi, int foldcase, uint32_t out); + void InitCapture(int cap, uint32_t out); + void InitEmptyWidth(EmptyOp empty, uint32_t out); void InitMatch(int id); - void InitNop(uint32 out); + void InitNop(uint32_t out); void InitFail(); // Getters @@ -121,29 +127,29 @@ class Prog { out_opcode_ = (out<<4) | (last()<<3) | opcode; } - uint32 out_opcode_; // 28 bits of out, 1 bit for last, 3 (low) bits opcode - union { // additional instruction arguments: - uint32 out1_; // opcode == kInstAlt - // alternate next instruction - - int32 cap_; // opcode == kInstCapture - // Index of capture register (holds text - // position recorded by capturing parentheses). - // For \n (the submatch for the nth parentheses), - // the left parenthesis captures into register 2*n - // and the right one captures into register 2*n+1. - - int32 match_id_; // opcode == kInstMatch - // Match ID to identify this match (for re2::Set). - - struct { // opcode == kInstByteRange - uint8 lo_; // byte range is lo_-hi_ inclusive - uint8 hi_; // - uint8 foldcase_; // convert A-Z to a-z before checking range. + uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode + union { // additional instruction arguments: + uint32_t out1_; // opcode == kInstAlt + // alternate next instruction + + int32_t cap_; // opcode == kInstCapture + // Index of capture register (holds text + // position recorded by capturing parentheses). + // For \n (the submatch for the nth parentheses), + // the left parenthesis captures into register 2*n + // and the right one captures into register 2*n+1. + + int32_t match_id_; // opcode == kInstMatch + // Match ID to identify this match (for re2::Set). + + struct { // opcode == kInstByteRange + uint8_t lo_; // byte range is lo_-hi_ inclusive + uint8_t hi_; // + uint8_t foldcase_; // convert A-Z to a-z before checking range. }; - EmptyOp empty_; // opcode == kInstEmptyWidth - // empty_ is bitwise OR of kEmpty* flags above. + EmptyOp empty_; // opcode == kInstEmptyWidth + // empty_ is bitwise OR of kEmpty* flags above. }; friend class Compiler; @@ -186,8 +192,8 @@ class Prog { void set_reversed(bool reversed) { reversed_ = reversed; } int list_count() { return list_count_; } int inst_count(InstOp op) { return inst_count_[op]; } - void set_dfa_mem(int64 dfa_mem) { dfa_mem_ = dfa_mem; } - int64 dfa_mem() { return dfa_mem_; } + void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; } + int64_t dfa_mem() { return dfa_mem_; } int flags() { return flags_; } void set_flags(int flags) { flags_ = flags; } bool anchor_start() { return anchor_start_; } @@ -195,7 +201,7 @@ class Prog { bool anchor_end() { return anchor_end_; } void set_anchor_end(bool b) { anchor_end_ = b; } int bytemap_range() { return bytemap_range_; } - const uint8* bytemap() { return bytemap_; } + const uint8_t* bytemap() { return bytemap_; } // Lazily computed. int first_byte(); @@ -207,7 +213,7 @@ class Prog { // Returns the set of kEmpty flags that are in effect at // position p within context. - static uint32 EmptyFlags(const StringPiece& context, const char* p); + static uint32_t EmptyFlags(const StringPiece& context, const char* p); // Returns whether byte c is a word character: ASCII only. // Used by the implementation of \b and \B. @@ -216,7 +222,7 @@ class Prog { // (the DFA has only one-byte lookahead). // - even if the lookahead were possible, the Progs would be huge. // This crude approximation is the same one PCRE uses. - static bool IsWordChar(uint8 c) { + static bool IsWordChar(uint8_t c) { return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || @@ -249,9 +255,8 @@ class Prog { // If matches != NULL and kind == kManyMatch and there is a match, // SearchDFA fills matches with the match IDs of the final matching state. bool SearchDFA(const StringPiece& text, const StringPiece& context, - Anchor anchor, MatchKind kind, - StringPiece* match0, bool* failed, - vector<int>* matches); + Anchor anchor, MatchKind kind, StringPiece* match0, + bool* failed, std::vector<int>* matches); // Build the entire DFA for the given match kind. FOR TESTING ONLY. // Usually the DFA is built out incrementally, as needed, which @@ -327,13 +332,14 @@ class Prog { // Marks the "roots" in the Prog: the outs of kInstByteRange, kInstCapture // and kInstEmptyWidth instructions. - void MarkRoots(SparseArray<int>* rootmap, - SparseSet* q, vector<int>* stk); + void MarkRoots(SparseArray<int>* rootmap, SparseSet* q, + std::vector<int>* stk); // Emits one "list" via "tree" traversal from the given "root" instruction. // The new instructions are appended to the given vector. - void EmitList(int root, SparseArray<int>* rootmap, vector<Inst>* flat, - SparseSet* q, vector<int>* stk); + void EmitList(int root, SparseArray<int>* rootmap, + std::vector<Inst>* flat, SparseSet* q, + std::vector<int>* stk); private: friend class Compiler; @@ -358,18 +364,19 @@ class Prog { int inst_count_[kNumInst]; // count of instructions by opcode Inst* inst_; // pointer to instruction array - uint8* onepass_nodes_; // data for OnePass nodes + uint8_t* onepass_nodes_; // data for OnePass nodes - Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_ - std::atomic<DFA*> dfa_first_; // DFA cached for kFirstMatch - std::atomic<DFA*> dfa_longest_; // DFA cached for kLongestMatch and kFullMatch - int64 dfa_mem_; // Maximum memory for DFAs. + Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_ + std::atomic<DFA*> dfa_first_; // DFA cached for kFirstMatch + std::atomic<DFA*> dfa_longest_; // DFA cached for kLongestMatch/kFullMatch + int64_t dfa_mem_; // Maximum memory for DFAs. - uint8 bytemap_[256]; // map from input bytes to byte classes + uint8_t bytemap_[256]; // map from input bytes to byte classes std::once_flag first_byte_once_; - DISALLOW_COPY_AND_ASSIGN(Prog); + Prog(const Prog&) = delete; + Prog& operator=(const Prog&) = delete; }; } // namespace re2 @@ -9,12 +9,25 @@ #include "re2/re2.h" -#include <stdio.h> -#include <string> +#include <assert.h> +#include <ctype.h> #include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <algorithm> +#include <iterator> +#include <mutex> +#include <string> +#include <utility> +#include <vector> + #include "util/util.h" #include "util/flags.h" +#include "util/logging.h" #include "util/sparse_array.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/prog.h" #include "re2/regexp.h" @@ -26,10 +39,10 @@ namespace re2 { static const int kMaxArgs = 16; static const int kVecSize = 1+kMaxArgs; -// This will trigger LNK2005 error in MSVC. -#ifndef _MSC_VER -const int RE2::Options::kDefaultMaxMem; // initialized in re2.h +#ifdef _MSC_VER +__declspec(selectany) #endif +const int RE2::Options::kDefaultMaxMem; // initialized in re2.h RE2::Options::Options(RE2::CannedOptions opt) : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), @@ -50,8 +63,8 @@ RE2::Options::Options(RE2::CannedOptions opt) // static empty objects for use as const references. // To avoid global constructors, allocated in RE2::Init(). static const string* empty_string; -static const map<string, int>* empty_named_groups; -static const map<int, string>* empty_group_names; +static const std::map<string, int>* empty_named_groups; +static const std::map<int, string>* empty_group_names; // Converts from Regexp error code to RE2 error code. // Maybe some day they will diverge. In any event, this @@ -92,8 +105,8 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) { static string trunc(const StringPiece& pattern) { if (pattern.size() < 100) - return pattern.as_string(); - return pattern.substr(0, 100).as_string() + "..."; + return pattern.ToString(); + return pattern.substr(0, 100).ToString() + "..."; } @@ -161,11 +174,11 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { static std::once_flag empty_once; std::call_once(empty_once, []() { empty_string = new string; - empty_named_groups = new map<string, int>; - empty_group_names = new map<int, string>; + empty_named_groups = new std::map<string, int>; + empty_group_names = new std::map<int, string>; }); - pattern_ = pattern.as_string(); + pattern_ = pattern.ToString(); options_.Copy(options); entire_regexp_ = NULL; suffix_regexp_ = NULL; @@ -189,7 +202,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { } error_ = new string(status.Text()); error_code_ = RegexpErrorToRE2(status.code()); - error_arg_ = status.error_arg().as_string(); + error_arg_ = status.error_arg().ToString(); return; } @@ -256,7 +269,7 @@ int RE2::ProgramSize() const { return prog_->size(); } -int RE2::ProgramFanout(map<int, int>* histogram) const { +int RE2::ProgramFanout(std::map<int, int>* histogram) const { if (prog_ == NULL) return -1; SparseArray<int> fanout(prog_->size()); @@ -284,7 +297,7 @@ int RE2::NumberOfCapturingGroups() const { } // Returns named_groups_, computing it if needed. -const map<string, int>& RE2::NamedCapturingGroups() const { +const std::map<string, int>& RE2::NamedCapturingGroups() const { std::call_once(named_groups_once_, [this]() { if (suffix_regexp_ != NULL) named_groups_ = suffix_regexp_->NamedCaptures(); @@ -295,7 +308,7 @@ const map<string, int>& RE2::NamedCapturingGroups() const { } // Returns group_names_, computing it if needed. -const map<int, string>& RE2::CapturingGroupNames() const { +const std::map<int, string>& RE2::CapturingGroupNames() const { std::call_once(group_names_once_, [this]() { if (suffix_regexp_ != NULL) group_names_ = suffix_regexp_->CaptureNames(); @@ -319,7 +332,7 @@ bool RE2::PartialMatchN(const StringPiece& text, const RE2& re, bool RE2::ConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int n) { - int consumed; + size_t consumed; if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) { input->remove_prefix(consumed); return true; @@ -330,7 +343,7 @@ bool RE2::ConsumeN(StringPiece* input, const RE2& re, bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int n) { - int consumed; + size_t consumed; if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) { input->remove_prefix(consumed); return true; @@ -365,7 +378,7 @@ bool RE2::Replace(string *str, int nvec = 1 + MaxSubmatch(rewrite); if (nvec > arraysize(vec)) return false; - if (!re.Match(*str, 0, static_cast<int>(str->size()), UNANCHORED, vec, nvec)) + if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) return false; string s; @@ -392,13 +405,37 @@ int RE2::GlobalReplace(string *str, string out; int count = 0; while (p <= ep) { - if (!re.Match(*str, static_cast<int>(p - str->data()), - static_cast<int>(str->size()), UNANCHORED, vec, nvec)) + if (!re.Match(*str, static_cast<size_t>(p - str->data()), + str->size(), UNANCHORED, vec, nvec)) break; if (p < vec[0].begin()) out.append(p, vec[0].begin() - p); if (vec[0].begin() == lastend && vec[0].size() == 0) { // Disallow empty match at end of last match: skip ahead. + // + // fullrune() takes int, not size_t. However, it just looks + // at the leading byte and treats any length >= 4 the same. + if (re.options().encoding() == RE2::Options::EncodingUTF8 && + fullrune(p, static_cast<int>(std::min(static_cast<ptrdiff_t>(4), + ep - p)))) { + // re is in UTF-8 mode and there is enough left of str + // to allow us to advance by up to UTFmax bytes. + Rune r; + int n = chartorune(&r, p); + // Some copies of chartorune have a bug that accepts + // encodings of values in (10FFFF, 1FFFFF] as valid. + if (r > Runemax) { + n = 1; + r = Runeerror; + } + if (!(n == 1 && r == Runeerror)) { // no decoding error + out.append(p, n); + p += n; + continue; + } + } + // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode, + // we fell through from above and the GIGO principle applies. if (p < ep) out.append(p, 1); p++; @@ -415,6 +452,7 @@ int RE2::GlobalReplace(string *str, if (p < ep) out.append(p, ep - p); + using std::swap; swap(out, *str); return count; } @@ -446,7 +484,7 @@ string RE2::QuoteMeta(const StringPiece& unquoted) { // that. (This also makes it identical to the perl function of the // same name except for the null-character special case; // see `perldoc -f quotemeta`.) - for (int ii = 0; ii < unquoted.length(); ++ii) { + for (size_t ii = 0; ii < unquoted.size(); ++ii) { // Note that using 'isalnum' here raises the benchmark time from // 32ns to 58ns: if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && @@ -518,12 +556,12 @@ bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const { // Avoid possible locale nonsense in standard strcasecmp. // The string a is known to be all lowercase. -static int ascii_strcasecmp(const char* a, const char* b, int len) { +static int ascii_strcasecmp(const char* a, const char* b, size_t len) { const char *ae = a + len; for (; a < ae; a++, b++) { - uint8 x = *a; - uint8 y = *b; + uint8_t x = *a; + uint8_t y = *b; if ('A' <= y && y <= 'Z') y += 'a' - 'A'; if (x != y) @@ -536,8 +574,8 @@ static int ascii_strcasecmp(const char* a, const char* b, int len) { /***** Actual matching and rewriting code *****/ bool RE2::Match(const StringPiece& text, - int startpos, - int endpos, + size_t startpos, + size_t endpos, Anchor re_anchor, StringPiece* submatch, int nsubmatch) const { @@ -547,7 +585,7 @@ bool RE2::Match(const StringPiece& text, return false; } - if (startpos < 0 || startpos > endpos || endpos > text.size()) { + if (startpos > endpos || endpos > text.size()) { if (options_.log_errors()) LOG(ERROR) << "RE2: invalid startpos, endpos pair. [" << "startpos: " << startpos << ", " @@ -585,11 +623,11 @@ bool RE2::Match(const StringPiece& text, re_anchor = ANCHOR_START; // Check for the required prefix, if any. - int prefixlen = 0; + size_t prefixlen = 0; if (!prefix_.empty()) { if (startpos != 0) return false; - prefixlen = static_cast<int>(prefix_.size()); + prefixlen = prefix_.size(); if (prefixlen > subtext.size()) return false; if (prefix_foldcase_) { @@ -621,7 +659,7 @@ bool RE2::Match(const StringPiece& text, const int MaxBitStateProg = 500; // prog_->size() <= Max. const int MaxBitStateVector = 256*1024; // bit vector size <= Max (bits) bool can_bit_state = prog_->size() <= MaxBitStateProg; - int bit_state_text_max = MaxBitStateVector / prog_->size(); + size_t bit_state_text_max = MaxBitStateVector / prog_->size(); bool dfa_failed = false; switch (re_anchor) { @@ -786,7 +824,7 @@ bool RE2::Match(const StringPiece& text, // Adjust overall match for required prefix that we stripped off. if (prefixlen > 0 && nsubmatch > 0) - submatch[0] = StringPiece(submatch[0].begin() - prefixlen, + submatch[0] = StringPiece(submatch[0].data() - prefixlen, submatch[0].size() + prefixlen); // Zero submatches that don't exist in the regexp. @@ -798,7 +836,7 @@ bool RE2::Match(const StringPiece& text, // Internal matcher - like Match() but takes Args not StringPieces. bool RE2::DoMatch(const StringPiece& text, Anchor anchor, - int* consumed, + size_t* consumed, const Arg* const* args, int n) const { if (!ok()) { @@ -831,7 +869,7 @@ bool RE2::DoMatch(const StringPiece& text, } if (consumed != NULL) - *consumed = static_cast<int>(vec[0].end() - text.begin()); + *consumed = static_cast<size_t>(vec[0].end() - text.begin()); if (n == 0 || args == NULL) { // We are not interested in results @@ -938,38 +976,38 @@ bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const { /***** Parsers for various types *****/ -bool RE2::Arg::parse_null(const char* str, int n, void* dest) { +bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) { // We fail if somebody asked us to store into a non-NULL void* pointer return (dest == NULL); } -bool RE2::Arg::parse_string(const char* str, int n, void* dest) { +bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) { if (dest == NULL) return true; reinterpret_cast<string*>(dest)->assign(str, n); return true; } -bool RE2::Arg::parse_stringpiece(const char* str, int n, void* dest) { +bool RE2::Arg::parse_stringpiece(const char* str, size_t n, void* dest) { if (dest == NULL) return true; - reinterpret_cast<StringPiece*>(dest)->set(str, n); + *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n); return true; } -bool RE2::Arg::parse_char(const char* str, int n, void* dest) { +bool RE2::Arg::parse_char(const char* str, size_t n, void* dest) { if (n != 1) return false; if (dest == NULL) return true; *(reinterpret_cast<char*>(dest)) = str[0]; return true; } -bool RE2::Arg::parse_schar(const char* str, int n, void* dest) { +bool RE2::Arg::parse_schar(const char* str, size_t n, void* dest) { if (n != 1) return false; if (dest == NULL) return true; *(reinterpret_cast<signed char*>(dest)) = str[0]; return true; } -bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) { +bool RE2::Arg::parse_uchar(const char* str, size_t n, void* dest) { if (n != 1) return false; if (dest == NULL) return true; *(reinterpret_cast<unsigned char*>(dest)) = str[0]; @@ -982,10 +1020,10 @@ static const int kMaxNumberLength = 32; // REQUIRES "buf" must have length at least nbuf. // Copies "str" into "buf" and null-terminates. // Overwrites *np with the new length. -static const char* TerminateNumber(char* buf, int nbuf, const char* str, int* np, - bool accept_spaces) { - int n = *np; - if (n <= 0) return ""; +static const char* TerminateNumber(char* buf, size_t nbuf, const char* str, + size_t* np, bool accept_spaces) { + size_t n = *np; + if (n == 0) return ""; if (n > 0 && isspace(*str)) { // We are less forgiving than the strtoxxx() routines and do not // allow leading spaces. We do allow leading spaces for floats. @@ -1037,9 +1075,9 @@ static const char* TerminateNumber(char* buf, int nbuf, const char* str, int* np } bool RE2::Arg::parse_long_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, sizeof buf, str, &n, false); @@ -1054,16 +1092,16 @@ bool RE2::Arg::parse_long_radix(const char* str, } bool RE2::Arg::parse_ulong_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, sizeof buf, str, &n, false); if (str[0] == '-') { - // strtoul() will silently accept negative numbers and parse - // them. This module is more strict and treats them as errors. - return false; + // strtoul() will silently accept negative numbers and parse + // them. This module is more strict and treats them as errors. + return false; } char* end; @@ -1077,9 +1115,9 @@ bool RE2::Arg::parse_ulong_radix(const char* str, } bool RE2::Arg::parse_short_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { long r; if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse if ((short)r != r) return false; // Out of range @@ -1089,9 +1127,9 @@ bool RE2::Arg::parse_short_radix(const char* str, } bool RE2::Arg::parse_ushort_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { unsigned long r; if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse if ((unsigned short)r != r) return false; // Out of range @@ -1101,9 +1139,9 @@ bool RE2::Arg::parse_ushort_radix(const char* str, } bool RE2::Arg::parse_int_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { long r; if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse if ((int)r != r) return false; // Out of range @@ -1113,9 +1151,9 @@ bool RE2::Arg::parse_int_radix(const char* str, } bool RE2::Arg::parse_uint_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { unsigned long r; if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse if ((unsigned int)r != r) return false; // Out of range @@ -1125,26 +1163,26 @@ bool RE2::Arg::parse_uint_radix(const char* str, } bool RE2::Arg::parse_longlong_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, sizeof buf, str, &n, false); char* end; errno = 0; - int64 r = strtoll(str, &end, radix); + long long r = strtoll(str, &end, radix); if (end != str + n) return false; // Leftover junk if (errno) return false; if (dest == NULL) return true; - *(reinterpret_cast<int64*>(dest)) = r; + *(reinterpret_cast<long long*>(dest)) = r; return true; } bool RE2::Arg::parse_ulonglong_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, sizeof buf, str, &n, false); @@ -1155,15 +1193,16 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str, } char* end; errno = 0; - uint64 r = strtoull(str, &end, radix); + unsigned long long r = strtoull(str, &end, radix); if (end != str + n) return false; // Leftover junk if (errno) return false; if (dest == NULL) return true; - *(reinterpret_cast<uint64*>(dest)) = r; + *(reinterpret_cast<unsigned long long*>(dest)) = r; return true; } -static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) { +static bool parse_double_float(const char* str, size_t n, bool isfloat, + void* dest) { if (n == 0) return false; static const int kMaxLength = 200; char buf[kMaxLength+1]; @@ -1187,26 +1226,27 @@ static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) return true; } -bool RE2::Arg::parse_double(const char* str, int n, void* dest) { +bool RE2::Arg::parse_double(const char* str, size_t n, void* dest) { return parse_double_float(str, n, false, dest); } -bool RE2::Arg::parse_float(const char* str, int n, void* dest) { +bool RE2::Arg::parse_float(const char* str, size_t n, void* dest) { return parse_double_float(str, n, true, dest); } -#define DEFINE_INTEGER_PARSER(name) \ - bool RE2::Arg::parse_##name(const char* str, int n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 10); \ - } \ - bool RE2::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 16); \ - } \ - bool RE2::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 8); \ - } \ - bool RE2::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 0); \ +#define DEFINE_INTEGER_PARSER(name) \ + bool RE2::Arg::parse_##name(const char* str, size_t n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 10); \ + } \ + bool RE2::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 16); \ + } \ + bool RE2::Arg::parse_##name##_octal(const char* str, size_t n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 8); \ + } \ + bool RE2::Arg::parse_##name##_cradix(const char* str, size_t n, \ + void* dest) { \ + return parse_##name##_radix(str, n, dest, 0); \ } DEFINE_INTEGER_PARSER(short); @@ -179,18 +179,25 @@ // RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d)); // will leave 64 in a, b, c, and d. +#include <stddef.h> #include <stdint.h> +#include <sys/types.h> +#include <algorithm> #include <map> #include <mutex> #include <string> + #include "re2/stringpiece.h" namespace re2 { - -using std::string; -using std::map; class Prog; class Regexp; +} // namespace re2 + +namespace re2 { + +// TODO(junyer): Get rid of this. +using std::string; // Interface for regular expression matching. Also corresponds to a // pre-compiled regular expression. An "RE2" object is safe for @@ -276,7 +283,7 @@ class RE2 { // EXPERIMENTAL! SUBJECT TO CHANGE! // Outputs the program fanout as a histogram bucketed by powers of 2. // Returns the number of the largest non-empty bucket. - int ProgramFanout(map<int, int>* histogram) const; + int ProgramFanout(std::map<int, int>* histogram) const; // Returns the underlying Regexp; not for general use. // Returns entire_regexp_ so that callers don't need @@ -295,7 +302,7 @@ class RE2 { // type, or one of: // string (matched piece is copied to string) // StringPiece (StringPiece is mutated to point to matched piece) - // T (where "bool T::ParseFrom(const char*, int)" exists) + // T (where "bool T::ParseFrom(const char*, size_t)" exists) // (void*)NULL (the corresponding matched sub-pattern is not copied) // // Returns true iff all of the following conditions are satisfied: @@ -464,12 +471,12 @@ class RE2 { // The map records the index of the leftmost group // with the given name. // Only valid until the re is deleted. - const map<string, int>& NamedCapturingGroups() const; + const std::map<string, int>& NamedCapturingGroups() const; // Return a map from capturing indices to names. // The map has no entries for unnamed groups. // Only valid until the re is deleted. - const map<int, string>& CapturingGroupNames() const; + const std::map<int, string>& CapturingGroupNames() const; // General matching routine. // Match against text starting at offset startpos @@ -492,8 +499,8 @@ class RE2 { // whether submatch i matched the empty string or did not match: // either way, match[i].data() == NULL. bool Match(const StringPiece& text, - int startpos, - int endpos, + size_t startpos, + size_t endpos, Anchor anchor, StringPiece *match, int nmatch) const; @@ -707,10 +714,10 @@ class RE2 { void Init(const StringPiece& pattern, const Options& options); bool DoMatch(const StringPiece& text, - Anchor anchor, - int* consumed, - const Arg* const args[], - int n) const; + Anchor anchor, + size_t* consumed, + const Arg* const args[], + int n) const; re2::Prog* ReverseProg() const; @@ -731,10 +738,10 @@ class RE2 { mutable int num_captures_; // Number of capturing groups // Map from capture names to indices - mutable const map<string, int>* named_groups_; + mutable const std::map<string, int>* named_groups_; // Map from capture indices to names - mutable const map<int, string>* group_names_; + mutable const std::map<int, string>* group_names_; // Onces for lazy computations. mutable std::once_flag rprog_once_; @@ -742,9 +749,8 @@ class RE2 { mutable std::once_flag named_groups_once_; mutable std::once_flag group_names_once_; - //DISALLOW_COPY_AND_ASSIGN(RE2); - RE2(const RE2&); - void operator=(const RE2&); + RE2(const RE2&) = delete; + RE2& operator=(const RE2&) = delete; }; /***** Implementation details *****/ @@ -755,7 +761,7 @@ class RE2 { template <class T> class _RE2_MatchObject { public: - static inline bool Parse(const char* str, int n, void* dest) { + static inline bool Parse(const char* str, size_t n, void* dest) { if (dest == NULL) return true; T* object = reinterpret_cast<T*>(dest); return object->ParseFrom(str, n); @@ -770,7 +776,7 @@ class RE2::Arg { // Constructor specially designed for NULL arguments Arg(void*); - typedef bool (*Parser)(const char* str, int n, void* dest); + typedef bool (*Parser)(const char* str, size_t n, void* dest); // Type-specific parsers #define MAKE_PARSER(type, name) \ @@ -803,31 +809,31 @@ class RE2::Arg { : arg_(p), parser_(parser) { } // Parse the data - bool Parse(const char* str, int n) const; + bool Parse(const char* str, size_t n) const; private: void* arg_; Parser parser_; - static bool parse_null (const char* str, int n, void* dest); - static bool parse_char (const char* str, int n, void* dest); - static bool parse_schar (const char* str, int n, void* dest); - static bool parse_uchar (const char* str, int n, void* dest); - static bool parse_float (const char* str, int n, void* dest); - static bool parse_double (const char* str, int n, void* dest); - static bool parse_string (const char* str, int n, void* dest); - static bool parse_stringpiece (const char* str, int n, void* dest); - -#define DECLARE_INTEGER_PARSER(name) \ - private: \ - static bool parse_##name(const char* str, int n, void* dest); \ - static bool parse_##name##_radix(const char* str, int n, void* dest, \ - int radix); \ - \ - public: \ - static bool parse_##name##_hex(const char* str, int n, void* dest); \ - static bool parse_##name##_octal(const char* str, int n, void* dest); \ - static bool parse_##name##_cradix(const char* str, int n, void* dest) + static bool parse_null (const char* str, size_t n, void* dest); + static bool parse_char (const char* str, size_t n, void* dest); + static bool parse_schar (const char* str, size_t n, void* dest); + static bool parse_uchar (const char* str, size_t n, void* dest); + static bool parse_float (const char* str, size_t n, void* dest); + static bool parse_double (const char* str, size_t n, void* dest); + static bool parse_string (const char* str, size_t n, void* dest); + static bool parse_stringpiece (const char* str, size_t n, void* dest); + +#define DECLARE_INTEGER_PARSER(name) \ + private: \ + static bool parse_##name(const char* str, size_t n, void* dest); \ + static bool parse_##name##_radix(const char* str, size_t n, void* dest, \ + int radix); \ + \ + public: \ + static bool parse_##name##_hex(const char* str, size_t n, void* dest); \ + static bool parse_##name##_octal(const char* str, size_t n, void* dest); \ + static bool parse_##name##_cradix(const char* str, size_t n, void* dest) DECLARE_INTEGER_PARSER(short); DECLARE_INTEGER_PARSER(ushort); @@ -845,7 +851,7 @@ class RE2::Arg { inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { } inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } -inline bool RE2::Arg::Parse(const char* str, int n) const { +inline bool RE2::Arg::Parse(const char* str, size_t n) const { return (*parser_)(str, n, arg_); } diff --git a/re2/regexp.cc b/re2/regexp.cc index 7b1b174..950c985 100644 --- a/re2/regexp.cc +++ b/re2/regexp.cc @@ -5,8 +5,21 @@ // Regular expression representation. // Tested by parse_test.cc -#include "util/util.h" #include "re2/regexp.h" + +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <algorithm> +#include <map> +#include <mutex> +#include <string> +#include <vector> + +#include "util/util.h" +#include "util/logging.h" +#include "util/mutex.h" +#include "util/utf.h" #include "re2/stringpiece.h" #include "re2/walker-inl.h" @@ -14,9 +27,9 @@ namespace re2 { // Constructor. Allocates vectors as appropriate for operator. Regexp::Regexp(RegexpOp op, ParseFlags parse_flags) - : op_(static_cast<uint8>(op)), + : op_(static_cast<uint8_t>(op)), simple_(false), - parse_flags_(static_cast<uint16>(parse_flags)), + parse_flags_(static_cast<uint16_t>(parse_flags)), ref_(1), nsub_(0), down_(NULL) { @@ -62,7 +75,7 @@ bool Regexp::QuickDestroy() { // Lazily allocated. static Mutex* ref_mutex; -static map<Regexp*, int>* ref_map; +static std::map<Regexp*, int>* ref_map; int Regexp::Ref() { if (ref_ < kMaxRef) @@ -78,7 +91,7 @@ Regexp* Regexp::Incref() { static std::once_flag ref_once; std::call_once(ref_once, []() { ref_mutex = new Mutex; - ref_map = new map<Regexp*, int>; + ref_map = new std::map<Regexp*, int>; }); // Store ref count in overflow map. @@ -105,7 +118,7 @@ void Regexp::Decref() { MutexLock l(ref_mutex); int r = (*ref_map)[this] - 1; if (r < kMaxRef) { - ref_ = static_cast<uint16>(r); + ref_ = static_cast<uint16_t>(r); ref_map->erase(this); } else { (*ref_map)[this] = r; @@ -410,7 +423,7 @@ bool Regexp::Equal(Regexp* a, Regexp* b) { // The stack (vector) has pairs of regexps waiting to // be compared. The regexps are only equal if // all the pairs end up being equal. - vector<Regexp*> stk; + std::vector<Regexp*> stk; for (;;) { // Invariant: TopEqual(a, b) == true. @@ -523,7 +536,9 @@ class NumCapturesWalker : public Regexp::Walker<Ignored> { private: int ncapture_; - DISALLOW_COPY_AND_ASSIGN(NumCapturesWalker); + + NumCapturesWalker(const NumCapturesWalker&) = delete; + NumCapturesWalker& operator=(const NumCapturesWalker&) = delete; }; int Regexp::NumCaptures() { @@ -538,8 +553,8 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> { NamedCapturesWalker() : map_(NULL) {} ~NamedCapturesWalker() { delete map_; } - map<string, int>* TakeMap() { - map<string, int>* m = map_; + std::map<string, int>* TakeMap() { + std::map<string, int>* m = map_; map_ = NULL; return m; } @@ -548,7 +563,7 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> { if (re->op() == kRegexpCapture && re->name() != NULL) { // Allocate map once we find a name. if (map_ == NULL) - map_ = new map<string, int>; + map_ = new std::map<string, int>; // Record first occurrence of each name. // (The rule is that if you have the same name @@ -566,11 +581,13 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> { } private: - map<string, int>* map_; - DISALLOW_COPY_AND_ASSIGN(NamedCapturesWalker); + std::map<string, int>* map_; + + NamedCapturesWalker(const NamedCapturesWalker&) = delete; + NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete; }; -map<string, int>* Regexp::NamedCaptures() { +std::map<string, int>* Regexp::NamedCaptures() { NamedCapturesWalker w; w.Walk(this, 0); return w.TakeMap(); @@ -582,8 +599,8 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> { CaptureNamesWalker() : map_(NULL) {} ~CaptureNamesWalker() { delete map_; } - map<int, string>* TakeMap() { - map<int, string>* m = map_; + std::map<int, string>* TakeMap() { + std::map<int, string>* m = map_; map_ = NULL; return m; } @@ -592,7 +609,7 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> { if (re->op() == kRegexpCapture && re->name() != NULL) { // Allocate map once we find a name. if (map_ == NULL) - map_ = new map<int, string>; + map_ = new std::map<int, string>; (*map_)[re->cap()] = *re->name(); } @@ -606,11 +623,13 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> { } private: - map<int, string>* map_; - DISALLOW_COPY_AND_ASSIGN(CaptureNamesWalker); + std::map<int, string>* map_; + + CaptureNamesWalker(const CaptureNamesWalker&) = delete; + CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete; }; -map<int, string>* Regexp::CaptureNames() { +std::map<int, string>* Regexp::CaptureNames() { CaptureNamesWalker w; w.Walk(this, 0); return w.TakeMap(); @@ -710,13 +729,13 @@ bool CharClassBuilder::AddRange(Rune lo, Rune hi) { if (lo <= 'z' && hi >= 'A') { // Overlaps some alpha, maybe not all. // Update bitmaps telling which ASCII letters are in the set. - Rune lo1 = max<Rune>(lo, 'A'); - Rune hi1 = min<Rune>(hi, 'Z'); + Rune lo1 = std::max<Rune>(lo, 'A'); + Rune hi1 = std::min<Rune>(hi, 'Z'); if (lo1 <= hi1) upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A'); - lo1 = max<Rune>(lo, 'a'); - hi1 = min<Rune>(hi, 'z'); + lo1 = std::max<Rune>(lo, 'a'); + hi1 = std::min<Rune>(hi, 'z'); if (lo1 <= hi1) lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a'); } @@ -832,7 +851,7 @@ void CharClassBuilder::RemoveAbove(Rune r) { void CharClassBuilder::Negate() { // Build up negation and then copy in. // Could edit ranges in place, but C++ won't let me. - vector<RuneRange> v; + std::vector<RuneRange> v; v.reserve(ranges_.size() + 1); // In negation, first range begins at 0, unless @@ -869,7 +888,7 @@ void CharClassBuilder::Negate() { CharClass* CharClass::New(int maxranges) { CharClass* cc; - uint8* data = new uint8[sizeof *cc + maxranges*sizeof cc->ranges_[0]]; + uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]]; cc = reinterpret_cast<CharClass*>(data); cc->ranges_ = reinterpret_cast<RuneRange*>(data + sizeof *cc); cc->nranges_ = 0; @@ -879,7 +898,7 @@ CharClass* CharClass::New(int maxranges) { } void CharClass::Delete() { - uint8 *data = reinterpret_cast<uint8*>(this); + uint8_t* data = reinterpret_cast<uint8_t*>(this); delete[] data; } diff --git a/re2/regexp.h b/re2/regexp.h index 607dc93..c11c2c1 100644 --- a/re2/regexp.h +++ b/re2/regexp.h @@ -86,7 +86,14 @@ // form accessible to clients, so that client code can analyze the // parsed regular expressions. +#include <stdint.h> +#include <map> +#include <set> +#include <string> + #include "util/util.h" +#include "util/logging.h" +#include "util/utf.h" #include "re2/stringpiece.h" namespace re2 { @@ -208,13 +215,10 @@ class RegexpStatus { StringPiece error_arg_; // Piece of regexp containing syntax error. string* tmp_; // Temporary storage, possibly where error_arg_ is. - DISALLOW_COPY_AND_ASSIGN(RegexpStatus); + RegexpStatus(const RegexpStatus&) = delete; + RegexpStatus& operator=(const RegexpStatus&) = delete; }; -// Walkers to implement Simplify. -class CoalesceWalker; -class SimplifyWalker; - // Compiled form; see prog.h class Prog; @@ -262,7 +266,9 @@ class CharClass { int nrunes_; RuneRange *ranges_; int nranges_; - DISALLOW_COPY_AND_ASSIGN(CharClass); + + CharClass(const CharClass&) = delete; + CharClass& operator=(const CharClass&) = delete; }; class Regexp { @@ -371,12 +377,12 @@ class Regexp { // Returns a map from names to capturing group indices, // or NULL if the regexp contains no named capture groups. // The caller is responsible for deleting the map. - map<string, int>* NamedCaptures(); + std::map<string, int>* NamedCaptures(); // Returns a map from capturing group indices to capturing group // names or NULL if the regexp contains no named capture groups. The // caller is responsible for deleting the map. - map<int, string>* CaptureNames(); + std::map<int, string>* CaptureNames(); // Returns a string representation of the current regexp, // using as few parentheses as possible. @@ -412,8 +418,8 @@ class Regexp { // Construction and execution of prog will // stay within approximately max_mem bytes of memory. // If max_mem <= 0, a reasonable default is used. - Prog* CompileToProg(int64 max_mem); - Prog* CompileToReverseProg(int64 max_mem); + Prog* CompileToProg(int64_t max_mem); + Prog* CompileToReverseProg(int64_t max_mem); // Whether to expect this library to find exactly the same answer as PCRE // when running this regexp. Most regexps do mimic PCRE exactly, but a few @@ -443,6 +449,7 @@ class Regexp { // Helpers for Parse. Listed here so they can edit Regexps. class ParseState; + friend class ParseState; friend bool ParseCharClass(StringPiece* s, Regexp** out_re, RegexpStatus* status); @@ -490,7 +497,7 @@ class Regexp { // Allocate space for n sub-regexps. void AllocSub(int n) { - if (n < 0 || static_cast<uint16>(n) != n) + if (n < 0 || static_cast<uint16_t>(n) != n) LOG(FATAL) << "Cannot AllocSub " << n; if (n > 1) submany_ = new Regexp*[n]; @@ -504,38 +511,38 @@ class Regexp { void Swap(Regexp *that); // Operator. See description of operators above. - // uint8 instead of RegexpOp to control space usage. - uint8 op_; + // uint8_t instead of RegexpOp to control space usage. + uint8_t op_; // Is this regexp structure already simple // (has it been returned by Simplify)? - // uint8 instead of bool to control space usage. - uint8 simple_; + // uint8_t instead of bool to control space usage. + uint8_t simple_; // Flags saved from parsing and used during execution. // (Only FoldCase is used.) - // uint16 instead of ParseFlags to control space usage. - uint16 parse_flags_; + // uint16_t instead of ParseFlags to control space usage. + uint16_t parse_flags_; // Reference count. Exists so that SimplifyRegexp can build // regexp structures that are dags rather than trees to avoid // exponential blowup in space requirements. - // uint16 to control space usage. + // uint16_t to control space usage. // The standard regexp routines will never generate a - // ref greater than the maximum repeat count (100), + // ref greater than the maximum repeat count (1000), // but even so, Incref and Decref consult an overflow map // when ref_ reaches kMaxRef. - uint16 ref_; - static const uint16 kMaxRef = 0xffff; + uint16_t ref_; + static const uint16_t kMaxRef = 0xffff; // Subexpressions. - // uint16 to control space usage. + // uint16_t to control space usage. // Concat and Alternate handle larger numbers of subexpressions // by building concatenation or alternation trees. // Other routines should call Concat or Alternate instead of // filling in sub() by hand. - uint16 nsub_; - static const uint16 kMaxNsub = 0xffff; + uint16_t nsub_; + static const uint16_t kMaxNsub = 0xffff; union { Regexp** submany_; // if nsub_ > 1 Regexp* subone_; // if nsub_ == 1 @@ -570,11 +577,12 @@ class Regexp { void *the_union_[2]; // as big as any other element, for memset }; - DISALLOW_COPY_AND_ASSIGN(Regexp); + Regexp(const Regexp&) = delete; + Regexp& operator=(const Regexp&) = delete; }; // Character class set: contains non-overlapping, non-abutting RuneRanges. -typedef set<RuneRange, RuneRangeLess> RuneRangeSet; +typedef std::set<RuneRange, RuneRangeLess> RuneRangeSet; class CharClassBuilder { public: @@ -599,12 +607,14 @@ class CharClassBuilder { void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags); private: - static const uint32 AlphaMask = (1<<26) - 1; - uint32 upper_; // bitmap of A-Z - uint32 lower_; // bitmap of a-z + static const uint32_t AlphaMask = (1<<26) - 1; + uint32_t upper_; // bitmap of A-Z + uint32_t lower_; // bitmap of a-z int nrunes_; RuneRangeSet ranges_; - DISALLOW_COPY_AND_ASSIGN(CharClassBuilder); + + CharClassBuilder(const CharClassBuilder&) = delete; + CharClassBuilder& operator=(const CharClassBuilder&) = delete; }; // Tell g++ that bitwise ops on ParseFlags produce ParseFlags. @@ -4,13 +4,16 @@ #include "re2/set.h" +#include <stddef.h> + #include "util/util.h" +#include "util/logging.h" #include "re2/stringpiece.h" #include "re2/prog.h" #include "re2/re2.h" #include "re2/regexp.h" -using namespace re2; +namespace re2 { RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) { options_.Copy(options); @@ -91,7 +94,7 @@ bool RE2::Set::Compile() { return prog_ != NULL; } -bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const { +bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const { if (!compiled_) { LOG(DFATAL) << "RE2::Set::Match without Compile"; return false; @@ -112,3 +115,5 @@ bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const { } return true; } + +} // namespace re2 @@ -5,13 +5,17 @@ #ifndef RE2_SET_H_ #define RE2_SET_H_ -#include <utility> +#include <string> #include <vector> #include "re2/re2.h" namespace re2 { -using std::vector; +class Prog; +class Regexp; +} // namespace re2 + +namespace re2 { // An RE2::Set represents a collection of regexps that can // be searched for simultaneously. @@ -37,17 +41,17 @@ class RE2::Set { // Match returns true if text matches any of the regexps in the set. // If so, it fills v (if not NULL) with the indices of the matching regexps. - bool Match(const StringPiece& text, vector<int>* v) const; + bool Match(const StringPiece& text, std::vector<int>* v) const; private: RE2::Options options_; RE2::Anchor anchor_; - vector<re2::Regexp*> re_; + std::vector<re2::Regexp*> re_; re2::Prog* prog_; bool compiled_; - //DISALLOW_COPY_AND_ASSIGN(Set); - Set(const Set&); - void operator=(const Set&); + + Set(const Set&) = delete; + Set& operator=(const Set&) = delete; }; } // namespace re2 diff --git a/re2/simplify.cc b/re2/simplify.cc index ecc60e7..06f0386 100644 --- a/re2/simplify.cc +++ b/re2/simplify.cc @@ -6,7 +6,11 @@ // to use simple extended regular expression features. // Also sort and simplify character classes. +#include <string> + #include "util/util.h" +#include "util/logging.h" +#include "util/utf.h" #include "re2/regexp.h" #include "re2/walker-inl.h" @@ -123,7 +127,8 @@ class CoalesceWalker : public Regexp::Walker<Regexp*> { // will be the coalesced op and the remainder of the literal string. static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr); - DISALLOW_COPY_AND_ASSIGN(CoalesceWalker); + CoalesceWalker(const CoalesceWalker&) = delete; + CoalesceWalker& operator=(const CoalesceWalker&) = delete; }; // Walker subclass used by Simplify. @@ -158,7 +163,8 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> { // Caller must Decref return value when done with it. static Regexp* SimplifyCharClass(Regexp* re); - DISALLOW_COPY_AND_ASSIGN(SimplifyWalker); + SimplifyWalker(const SimplifyWalker&) = delete; + SimplifyWalker& operator=(const SimplifyWalker&) = delete; }; // Simplifies a regular expression, returning a new regexp. diff --git a/re2/stringpiece.cc b/re2/stringpiece.cc index 00f478a..94c2bcb 100644 --- a/re2/stringpiece.cc +++ b/re2/stringpiece.cc @@ -3,96 +3,64 @@ // license that can be found in the LICENSE file. #include "re2/stringpiece.h" -#include "util/util.h" - -using re2::StringPiece; -std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { - o.write(piece.data(), piece.size()); - return o; -} +#include <ostream> -bool StringPiece::_equal(const StringPiece& x, const StringPiece& y) { - int len = x.size(); - if (len != y.size()) { - return false; - } - const char* p = x.data(); - const char* p2 = y.data(); - // Test last byte in case strings share large common prefix - if ((len > 0) && (p[len-1] != p2[len-1])) return false; - const char* p_limit = p + len; - for (; p < p_limit; p++, p2++) { - if (*p != *p2) - return false; - } - return true; -} +#include "util/util.h" -void StringPiece::CopyToString(string* target) const { - target->assign(ptr_, length_); -} +using re2::StringPiece; -void StringPiece::AppendToString(string* target) const { - target->append(ptr_, length_); -} +#ifdef _MSC_VER +__declspec(selectany) +#endif +const StringPiece::size_type StringPiece::npos; // initialized in stringpiece.h StringPiece::size_type StringPiece::copy(char* buf, size_type n, size_type pos) const { - size_type ret = min(length_ - pos, n); - memcpy(buf, ptr_ + pos, ret); + size_type ret = std::min(size_ - pos, n); + memcpy(buf, data_ + pos, ret); return ret; } -bool StringPiece::contains(StringPiece s) const { - return find(s, 0) != npos; +StringPiece StringPiece::substr(size_type pos, size_type n) const { + if (pos > size_) pos = size_; + if (n > size_ - pos) n = size_ - pos; + return StringPiece(data_ + pos, n); } StringPiece::size_type StringPiece::find(const StringPiece& s, size_type pos) const { - if (length_ < 0 || pos > static_cast<size_type>(length_)) - return npos; - - const char* result = std::search(ptr_ + pos, ptr_ + length_, - s.ptr_, s.ptr_ + s.length_); - const size_type xpos = result - ptr_; - return xpos + s.length_ <= static_cast<size_type>(length_) ? xpos : npos; + if (pos > size_) return npos; + const_pointer result = std::search(data_ + pos, data_ + size_, + s.data_, s.data_ + s.size_); + size_type xpos = result - data_; + return xpos + s.size_ <= size_ ? xpos : npos; } StringPiece::size_type StringPiece::find(char c, size_type pos) const { - if (length_ <= 0 || pos >= static_cast<size_type>(length_)) { - return npos; - } - const char* result = std::find(ptr_ + pos, ptr_ + length_, c); - return result != ptr_ + length_ ? result - ptr_ : npos; + if (size_ <= 0 || pos >= size_) return npos; + const_pointer result = std::find(data_ + pos, data_ + size_, c); + return result != data_ + size_ ? result - data_ : npos; } StringPiece::size_type StringPiece::rfind(const StringPiece& s, size_type pos) const { - if (length_ < s.length_) return npos; - const size_type ulen = length_; - if (s.length_ == 0) return min(ulen, pos); - - const char* last = ptr_ + min(ulen - s.length_, pos) + s.length_; - const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); - return result != last ? result - ptr_ : npos; + if (size_ < s.size_) return npos; + if (s.size_ == 0) return std::min(size_, pos); + const_pointer last = data_ + std::min(size_ - s.size_, pos) + s.size_; + const_pointer result = std::find_end(data_, last, s.data_, s.data_ + s.size_); + return result != last ? result - data_ : npos; } StringPiece::size_type StringPiece::rfind(char c, size_type pos) const { - if (length_ <= 0) return npos; - for (int i = static_cast<int>(min(pos, static_cast<size_type>(length_ - 1))); - i >= 0; --i) { - if (ptr_[i] == c) { - return i; - } + if (size_ <= 0) return npos; + for (size_t i = std::min(pos + 1, size_); i != 0;) { + if (data_[--i] == c) return i; } return npos; } -StringPiece StringPiece::substr(size_type pos, size_type n) const { - if (pos > static_cast<size_type>(length_)) pos = static_cast<size_type>(length_); - if (n > length_ - pos) n = length_ - pos; - return StringPiece(ptr_ + pos, static_cast<int>(n)); +std::ostream& operator<<(std::ostream& o, const StringPiece& p) { + o.write(p.data(), p.size()); + return o; } - -const StringPiece::size_type StringPiece::npos = size_type(-1); diff --git a/re2/stringpiece.h b/re2/stringpiece.h index 0bf5b0c..082d476 100644 --- a/re2/stringpiece.h +++ b/re2/stringpiece.h @@ -19,140 +19,139 @@ // // Arghh! I wish C++ literals were "string". +#include <stddef.h> #include <string.h> #include <algorithm> -#include <cstddef> #include <iosfwd> +#include <iterator> #include <string> namespace re2 { class StringPiece { - private: - const char* ptr_; - int length_; - public: + typedef char value_type; + typedef char* pointer; + typedef const char* const_pointer; + typedef char& reference; + typedef const char& const_reference; + typedef const char* const_iterator; + typedef const_iterator iterator; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + typedef const_reverse_iterator reverse_iterator; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + static const size_type npos = static_cast<size_type>(-1); + // We provide non-explicit singleton constructors so users can pass // in a "const char*" or a "string" wherever a "StringPiece" is // expected. - StringPiece() : ptr_(NULL), length_(0) { } - StringPiece(const char* str) - : ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { } + StringPiece() + : data_(NULL), size_(0) {} StringPiece(const std::string& str) - : ptr_(str.data()), length_(static_cast<int>(str.size())) { } - StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { } - - // data() may return a pointer to a buffer with embedded NULs, and the - // returned buffer may or may not be null terminated. Therefore it is - // typically a mistake to pass data() to a routine that expects a NUL - // terminated string. - const char* data() const { return ptr_; } - int size() const { return length_; } - int length() const { return length_; } - bool empty() const { return length_ == 0; } - - void clear() { ptr_ = NULL; length_ = 0; } - void set(const char* data, int len) { ptr_ = data; length_ = len; } - void set(const char* str) { - ptr_ = str; - if (str != NULL) - length_ = static_cast<int>(strlen(str)); - else - length_ = 0; + : data_(str.data()), size_(str.size()) {} + StringPiece(const char* str) + : data_(str), size_(str == NULL ? 0 : strlen(str)) {} + StringPiece(const char* str, size_type len) + : data_(str), size_(len) {} + + const_iterator begin() const { return data_; } + const_iterator end() const { return data_ + size_; } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(data_ + size_); } - void set(const void* data, int len) { - ptr_ = reinterpret_cast<const char*>(data); - length_ = len; + const_reverse_iterator rend() const { + return const_reverse_iterator(data_); } - char operator[](int i) const { return ptr_[i]; } + size_type size() const { return size_; } + size_type length() const { return size_; } + bool empty() const { return size_ == 0; } + + const_reference operator[](size_type i) const { return data_[i]; } + const_pointer data() const { return data_; } - void remove_prefix(int n) { - ptr_ += n; - length_ -= n; + void remove_prefix(size_type n) { + data_ += n; + size_ -= n; } - void remove_suffix(int n) { - length_ -= n; + void remove_suffix(size_type n) { + size_ -= n; } - int compare(const StringPiece& x) const { - int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_)); - if (r == 0) { - if (length_ < x.length_) r = -1; - else if (length_ > x.length_) r = +1; - } - return r; + void set(const char* str) { + data_ = str; + size_ = str == NULL ? 0 : strlen(str); + } + + void set(const char* str, size_type len) { + data_ = str; + size_ = len; } std::string as_string() const { - return std::string(data(), size()); + return std::string(data_, size_); } + // We also define ToString() here, since many other string-like // interfaces name the routine that converts to a C++ string // "ToString", and it's confusing to have the method that does that // for a StringPiece be called "as_string()". We also leave the // "as_string()" method defined here for existing code. std::string ToString() const { - return std::string(data(), size()); + return std::string(data_, size_); } - void CopyToString(std::string* target) const; - void AppendToString(std::string* target) const; - - // Does "this" start with "x" - bool starts_with(const StringPiece& x) const { - return ((length_ >= x.length_) && - (memcmp(ptr_, x.ptr_, x.length_) == 0)); + void CopyToString(std::string* target) const { + target->assign(data_, size_); } - // Does "this" end with "x" - bool ends_with(const StringPiece& x) const { - return ((length_ >= x.length_) && - (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0)); + void AppendToString(std::string* target) const { + target->append(data_, size_); } - // standard STL container boilerplate - typedef char value_type; - typedef const char* pointer; - typedef const char& reference; - typedef const char& const_reference; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - static const size_type npos; - typedef const char* const_iterator; - typedef const char* iterator; - typedef std::reverse_iterator<const_iterator> const_reverse_iterator; - typedef std::reverse_iterator<iterator> reverse_iterator; - iterator begin() const { return ptr_; } - iterator end() const { return ptr_ + length_; } - const_reverse_iterator rbegin() const { - return const_reverse_iterator(ptr_ + length_); + size_type copy(char* buf, size_type n, size_type pos = 0) const; + StringPiece substr(size_type pos = 0, size_type n = npos) const; + + int compare(const StringPiece& x) const { + int r = memcmp(data_, x.data_, std::min(size_, x.size_)); + if (r == 0) { + if (size_ < x.size_) r = -1; + else if (size_ > x.size_) r = +1; + } + return r; } - const_reverse_iterator rend() const { - return const_reverse_iterator(ptr_); + + // Does "this" start with "x"? + bool starts_with(const StringPiece& x) const { + return size_ >= x.size_ && + memcmp(data_, x.data_, x.size_) == 0; } - // STLS says return size_type, but Google says return int - int max_size() const { return length_; } - int capacity() const { return length_; } - size_type copy(char* buf, size_type n, size_type pos = 0) const; + // Does "this" end with "x"? + bool ends_with(const StringPiece& x) const { + return size_ >= x.size_ && + memcmp(data_ + size_ - x.size_, x.data_, x.size_) == 0; + } - bool contains(StringPiece s) const; + bool contains(const StringPiece& s) const { + return find(s) != npos; + } size_type find(const StringPiece& s, size_type pos = 0) const; size_type find(char c, size_type pos = 0) const; size_type rfind(const StringPiece& s, size_type pos = npos) const; size_type rfind(char c, size_type pos = npos) const; - StringPiece substr(size_type pos, size_type n = npos) const; - - static bool _equal(const StringPiece&, const StringPiece&); + private: + const_pointer data_; + size_type size_; }; inline bool operator==(const StringPiece& x, const StringPiece& y) { - return StringPiece::_equal(x, y); + return x.size() == y.size() && + memcmp(x.data(), y.data(), x.size()) == 0; } inline bool operator!=(const StringPiece& x, const StringPiece& y) { @@ -160,8 +159,7 @@ inline bool operator!=(const StringPiece& x, const StringPiece& y) { } inline bool operator<(const StringPiece& x, const StringPiece& y) { - const int r = memcmp(x.data(), y.data(), - std::min(x.size(), y.size())); + int r = memcmp(x.data(), y.data(), std::min(x.size(), y.size())); return ((r < 0) || ((r == 0) && (x.size() < y.size()))); } @@ -179,7 +177,7 @@ inline bool operator>=(const StringPiece& x, const StringPiece& y) { } // namespace re2 -// allow StringPiece to be logged -extern std::ostream& operator<<(std::ostream& o, const re2::StringPiece& piece); +// Allow StringPiece to be logged. +std::ostream& operator<<(std::ostream& o, const re2::StringPiece& p); #endif // RE2_STRINGPIECE_H_ diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc index 95c14c2..a16b54f 100644 --- a/re2/testing/backtrack.cc +++ b/re2/testing/backtrack.cc @@ -23,7 +23,12 @@ // not the main library, in order to make it harder to pick up // accidentally. +#include <stddef.h> +#include <stdint.h> +#include <string.h> + #include "util/util.h" +#include "util/logging.h" #include "re2/prog.h" #include "re2/regexp.h" @@ -75,7 +80,7 @@ class Backtracker { // Search state const char* cap_[64]; // capture registers - uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked + uint32_t *visited_; // bitmap: (Inst*, char*) pairs already backtracked size_t nvisited_; // # of words in bitmap }; @@ -127,7 +132,7 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context, // to text, so have to reallocate on each call to Search. delete[] visited_; nvisited_ = (prog_->size()*(text.size()+1) + 31)/32; - visited_ = new uint32[nvisited_]; + visited_ = new uint32_t[nvisited_]; memset(visited_, 0, nvisited_*sizeof visited_[0]); // Anchored search must start at text.begin(). @@ -224,8 +229,8 @@ bool Backtracker::Try(int id, const char* p) { if (submatch_[0].data() == NULL || // First match so far ... (longest_ && p > submatch_[0].end())) { // ... or better match for (int i = 0; i < nsubmatch_; i++) - submatch_[i].set(cap_[2*i], - static_cast<int>(cap_[2*i+1] - cap_[2*i])); + submatch_[i] = StringPiece( + cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i])); } return true; diff --git a/re2/testing/charclass_test.cc b/re2/testing/charclass_test.cc index a3764d4..7e0169c 100644 --- a/re2/testing/charclass_test.cc +++ b/re2/testing/charclass_test.cc @@ -4,7 +4,10 @@ // Test character class manipulations. +#include <stdio.h> + #include "util/test.h" +#include "util/utf.h" #include "re2/regexp.h" namespace re2 { diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc index cd8406d..eb723bc 100644 --- a/re2/testing/compile_test.cc +++ b/re2/testing/compile_test.cc @@ -5,13 +5,12 @@ // Test prog.cc, compile.cc #include <string> -#include <vector> + #include "util/test.h" +#include "util/logging.h" #include "re2/regexp.h" #include "re2/prog.h" -DEFINE_string(show, "", "regular expression to compile and dump"); - namespace re2 { // Simple input/output tests checking that diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc index e9c7bef..2051683 100644 --- a/re2/testing/dfa_test.cc +++ b/re2/testing/dfa_test.cc @@ -2,8 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "util/thread.h" +#include <stdint.h> +#include <string> +#include <thread> +#include <vector> + #include "util/test.h" +#include "util/logging.h" +#include "util/strutil.h" #include "re2/prog.h" #include "re2/re2.h" #include "re2/regexp.h" @@ -22,17 +28,10 @@ namespace re2 { // Check that multithreaded access to DFA class works. -// Helper thread: builds entire DFA for prog. -class BuildThread : public Thread { - public: - BuildThread(Prog* prog) : prog_(prog) {} - virtual void Run() { - CHECK(prog_->BuildEntireDFA(Prog::kFirstMatch)); - } - - private: - Prog* prog_; -}; +// Helper function: builds entire DFA for prog. +static void DoBuild(Prog* prog) { + CHECK(prog->BuildEntireDFA(Prog::kFirstMatch)); +} TEST(Multithreaded, BuildEntireDFA) { // Create regexp with 2^FLAGS_size states in DFA. @@ -40,48 +39,37 @@ TEST(Multithreaded, BuildEntireDFA) { for (int i = 0; i < FLAGS_size; i++) s += "[ab]"; s += "b"; + Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL); + CHECK(re); // Check that single-threaded code works. { - //LOG(INFO) << s; - Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL); - CHECK(re); Prog* prog = re->CompileToProg(0); CHECK(prog); - BuildThread* t = new BuildThread(prog); - t->SetJoinable(true); - t->Start(); - t->Join(); - delete t; + + std::thread t(DoBuild, prog); + t.join(); + delete prog; - re->Decref(); } // Build the DFA simultaneously in a bunch of threads. for (int i = 0; i < FLAGS_repeat; i++) { - Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL); - CHECK(re); Prog* prog = re->CompileToProg(0); CHECK(prog); - vector<BuildThread*> threads; - for (int j = 0; j < FLAGS_threads; j++) { - BuildThread *t = new BuildThread(prog); - t->SetJoinable(true); - threads.push_back(t); - } + std::vector<std::thread> threads; for (int j = 0; j < FLAGS_threads; j++) - threads[j]->Start(); - for (int j = 0; j < FLAGS_threads; j++) { - threads[j]->Join(); - delete threads[j]; - } + threads.emplace_back(DoBuild, prog); + for (int j = 0; j < FLAGS_threads; j++) + threads[j].join(); // One more compile, to make sure everything is okay. prog->BuildEntireDFA(Prog::kFirstMatch); delete prog; - re->Decref(); } + + re->Decref(); } // Check that DFA size requirements are followed. @@ -98,9 +86,9 @@ TEST(SingleThreaded, BuildEntireDFA) { CHECK(re); int max = 24; for (int i = 17; i < max; i++) { - int64 limit = 1<<i; - int64 usage; - //int64 progusage, dfamem; + int64_t limit = 1<<i; + int64_t usage; + //int64_t progusage, dfamem; { testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY); Prog* prog = re->CompileToProg(limit); @@ -139,7 +127,7 @@ static string DeBruijnString(int n) { CHECK_LT(n, static_cast<int>(8*sizeof(int))); CHECK_GT(n, 0); - vector<bool> did(1<<n); + std::vector<bool> did(1<<n); for (int i = 0; i < 1<<n; i++) did[i] = false; @@ -202,8 +190,8 @@ TEST(SingleThreaded, SearchDFA) { // Tell the DFA to trudge along instead. FLAGS_re2_dfa_bail_when_slow = false; - int64 usage; - int64 peak_usage; + int64_t usage; + int64_t peak_usage; { testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY); Prog* prog = re->CompileToProg(1<<n); @@ -234,35 +222,25 @@ TEST(SingleThreaded, SearchDFA) { re->Decref(); } -// Helper thread: searches for match, which should match, +// Helper function: searches for match, which should match, // and no_match, which should not. -class SearchThread : public Thread { - public: - SearchThread(Prog* prog, const StringPiece& match, - const StringPiece& no_match) - : prog_(prog), match_(match), no_match_(no_match) {} - - virtual void Run() { - for (int i = 0; i < 2; i++) { - bool matched, failed = false; - matched = prog_->SearchDFA(match_, NULL, - Prog::kUnanchored, Prog::kFirstMatch, - NULL, &failed, NULL); - CHECK(!failed); - CHECK(matched); - matched = prog_->SearchDFA(no_match_, NULL, - Prog::kUnanchored, Prog::kFirstMatch, - NULL, &failed, NULL); - CHECK(!failed); - CHECK(!matched); - } +static void DoSearch(Prog* prog, const StringPiece& match, + const StringPiece& no_match) { + for (int i = 0; i < 2; i++) { + bool matched = false; + bool failed = false; + matched = prog->SearchDFA(match, NULL, + Prog::kUnanchored, Prog::kFirstMatch, + NULL, &failed, NULL); + CHECK(!failed); + CHECK(matched); + matched = prog->SearchDFA(no_match, NULL, + Prog::kUnanchored, Prog::kFirstMatch, + NULL, &failed, NULL); + CHECK(!failed); + CHECK(!matched); } - - private: - Prog* prog_; - StringPiece match_; - StringPiece no_match_; -}; +} TEST(Multithreaded, SearchDFA) { // Same as single-threaded test above. @@ -278,35 +256,28 @@ TEST(Multithreaded, SearchDFA) { { Prog* prog = re->CompileToProg(1<<n); CHECK(prog); - SearchThread* t = new SearchThread(prog, match, no_match); - t->SetJoinable(true); - t->Start(); - t->Join(); - delete t; + + std::thread t(DoSearch, prog, match, no_match); + t.join(); + delete prog; } // Run the search simultaneously in a bunch of threads. // Reuse same flags for Multithreaded.BuildDFA above. for (int i = 0; i < FLAGS_repeat; i++) { - //LOG(INFO) << "Search " << i; Prog* prog = re->CompileToProg(1<<n); CHECK(prog); - vector<SearchThread*> threads; - for (int j = 0; j < FLAGS_threads; j++) { - SearchThread *t = new SearchThread(prog, match, no_match); - t->SetJoinable(true); - threads.push_back(t); - } + std::vector<std::thread> threads; for (int j = 0; j < FLAGS_threads; j++) - threads[j]->Start(); - for (int j = 0; j < FLAGS_threads; j++) { - threads[j]->Join(); - delete threads[j]; - } + threads.emplace_back(DoSearch, prog, match, no_match); + for (int j = 0; j < FLAGS_threads; j++) + threads[j].join(); + delete prog; } + re->Decref(); } diff --git a/re2/testing/dump.cc b/re2/testing/dump.cc index 9703039..b60bf24 100644 --- a/re2/testing/dump.cc +++ b/re2/testing/dump.cc @@ -17,8 +17,11 @@ // library (see BUILD). #include <string> -#include <vector> + #include "util/test.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/stringpiece.h" #include "re2/regexp.h" diff --git a/re2/testing/exhaustive1_test.cc b/re2/testing/exhaustive1_test.cc index c06a10c..29c5def 100644 --- a/re2/testing/exhaustive1_test.cc +++ b/re2/testing/exhaustive1_test.cc @@ -4,6 +4,9 @@ // Exhaustive testing of regular expression matching. +#include <string> +#include <vector> + #include "util/test.h" #include "re2/testing/exhaustive_tester.h" @@ -13,7 +16,7 @@ namespace re2 { // Test simple repetition operators TEST(Repetition, Simple) { - vector<string> ops = Split(" ", + std::vector<string> ops = Split(" ", "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} " "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} " "%s* %s+ %s? %s*? %s+? %s??"); @@ -25,7 +28,7 @@ TEST(Repetition, Simple) { // Test capturing parens -- (a) -- inside repetition operators TEST(Repetition, Capturing) { - vector<string> ops = Split(" ", + std::vector<string> ops = Split(" ", "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} " "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} " "%s* %s+ %s? %s*? %s+? %s??"); @@ -33,7 +36,7 @@ TEST(Repetition, Capturing) { 7, Explode("ab"), "(?:%s)", ""); // This would be a great test, but it runs forever when PCRE is enabled. - if (!StringPiece(FLAGS_regexp_engines).contains("PCRE")) + if (FLAGS_regexp_engines.find("PCRE") == string::npos) ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops, 50, Explode("a"), "(?:%s)", ""); } diff --git a/re2/testing/exhaustive2_test.cc b/re2/testing/exhaustive2_test.cc index 6dc5016..ba38a6e 100644 --- a/re2/testing/exhaustive2_test.cc +++ b/re2/testing/exhaustive2_test.cc @@ -4,12 +4,15 @@ // Exhaustive testing of regular expression matching. +#include <stddef.h> +#include <memory> +#include <string> +#include <vector> + #include "util/test.h" #include "re2/re2.h" #include "re2/testing/exhaustive_tester.h" -DECLARE_string(regexp_engines); - namespace re2 { // Test empty string matches (aka "(?:)") @@ -21,8 +24,8 @@ TEST(EmptyString, Exhaustive) { // Test escaped versions of regexp syntax. TEST(Punctuation, Literals) { - vector<string> alphabet = Explode("()*+?{}[]\\^$."); - vector<string> escaped = alphabet; + std::vector<string> alphabet = Explode("()*+?{}[]\\^$."); + std::vector<string> escaped = alphabet; for (size_t i = 0; i < escaped.size(); i++) escaped[i] = "\\" + escaped[i]; ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(), @@ -60,7 +63,7 @@ TEST(LineEnds, Exhaustive) { // provides a mechanism, and RE2 could add new syntax if needed. // // TEST(Newlines, Exhaustive) { -// vector<string> empty_vector; +// std::vector<string> empty_vector; // ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"), // RegexpGenerator::EgrepOps(), // 4, Explode("a\n"), ""); diff --git a/re2/testing/exhaustive3_test.cc b/re2/testing/exhaustive3_test.cc index 6e46bb4..cf09e18 100644 --- a/re2/testing/exhaustive3_test.cc +++ b/re2/testing/exhaustive3_test.cc @@ -4,14 +4,20 @@ // Exhaustive testing of regular expression matching. +#include <stddef.h> +#include <memory> +#include <string> +#include <vector> + #include "util/test.h" +#include "util/utf.h" #include "re2/testing/exhaustive_tester.h" namespace re2 { // Test simple character classes by themselves. TEST(CharacterClasses, Exhaustive) { - vector<string> atoms = Split(" ", + std::vector<string> atoms = Split(" ", "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b ."); ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(), 5, Explode("ab"), "", ""); @@ -19,7 +25,7 @@ TEST(CharacterClasses, Exhaustive) { // Test simple character classes inside a___b (for example, a[a]b). TEST(CharacterClasses, ExhaustiveAB) { - vector<string> atoms = Split(" ", + std::vector<string> atoms = Split(" ", "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b ."); ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(), 5, Explode("ab"), "a%sb", ""); @@ -35,9 +41,9 @@ static string UTF8(Rune r) { // Returns a vector of "interesting" UTF8 characters. // Unicode is now too big to just return all of them, // so UTF8Characters return a set likely to be good test cases. -static const vector<string>& InterestingUTF8() { +static const std::vector<string>& InterestingUTF8() { static bool init; - static vector<string> v; + static std::vector<string> v; if (init) return v; @@ -64,12 +70,12 @@ static const vector<string>& InterestingUTF8() { // Test interesting UTF-8 characters against character classes. TEST(InterestingUTF8, SingleOps) { - vector<string> atoms = Split(" ", + std::vector<string> atoms = Split(" ", ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B " "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] " "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] " "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]"); - vector<string> ops; // no ops + std::vector<string> ops; // no ops ExhaustiveTest(1, 0, atoms, ops, 1, InterestingUTF8(), "", ""); } @@ -77,13 +83,13 @@ TEST(InterestingUTF8, SingleOps) { // Test interesting UTF-8 characters against character classes, // but wrap everything inside AB. TEST(InterestingUTF8, AB) { - vector<string> atoms = Split(" ", + std::vector<string> atoms = Split(" ", ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B " "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] " "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] " "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]"); - vector<string> ops; // no ops - vector<string> alpha = InterestingUTF8(); + std::vector<string> ops; // no ops + std::vector<string> alpha = InterestingUTF8(); for (size_t i = 0; i < alpha.size(); i++) alpha[i] = "a" + alpha[i] + "b"; ExhaustiveTest(1, 0, atoms, ops, diff --git a/re2/testing/exhaustive_test.cc b/re2/testing/exhaustive_test.cc index fc40dee..514fd90 100644 --- a/re2/testing/exhaustive_test.cc +++ b/re2/testing/exhaustive_test.cc @@ -9,8 +9,6 @@ namespace re2 { -DECLARE_string(regexp_engines); - // Test very simple expressions. TEST(EgrepLiterals, Lowercase) { EgrepTest(3, 2, "abc.", 3, "abc", ""); diff --git a/re2/testing/exhaustive_tester.cc b/re2/testing/exhaustive_tester.cc index 0e90f33..4f6335f 100644 --- a/re2/testing/exhaustive_tester.cc +++ b/re2/testing/exhaustive_tester.cc @@ -11,34 +11,32 @@ // the NFA, DFA, and a trivial backtracking implementation agree about // the location of the match. -#include <stdlib.h> #include <stdio.h> -#ifndef LOGGING -#define LOGGING 0 -#endif - #include "util/test.h" +#include "util/logging.h" +#include "util/strutil.h" #include "re2/testing/exhaustive_tester.h" #include "re2/testing/tester.h" +// For target `log' in the Makefile. +#ifndef LOGGING +#define LOGGING 0 +#endif + DEFINE_bool(show_regexps, false, "show regexps during testing"); DEFINE_int32(max_bad_regexp_inputs, 1, "Stop testing a regular expression after finding this many " "strings that break it."); -// Compiled in debug mode, the usual tests run for over an hour. -// Have to cut it down to make the unit test machines happy. -DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode."); - namespace re2 { static char* escape(const StringPiece& sp) { static char buf[512]; char* p = buf; *p++ = '\"'; - for (int i = 0; i < sp.size(); i++) { + for (size_t i = 0; i < sp.size(); i++) { if(p+5 >= buf+sizeof buf) LOG(FATAL) << "ExhaustiveTester escape: too long"; if(sp[i] == '\\' || sp[i] == '\"') { @@ -67,10 +65,11 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc if (m[i].begin() == NULL) printf("-"); else - printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin())); + printf("%td-%td", + m[i].begin() - input.begin(), m[i].end() - input.begin()); } } - + // Processes a single generated regexp. // Compiles it using Regexp interface and PCRE, and then // checks that NFA, DFA, and PCRE all return the same results. @@ -143,12 +142,13 @@ void ExhaustiveTester::HandleRegexp(const string& const_regexp) { // Runs an exhaustive test on the given parameters. void ExhaustiveTest(int maxatoms, int maxops, - const vector<string>& alphabet, - const vector<string>& ops, - int maxstrlen, const vector<string>& stralphabet, + const std::vector<string>& alphabet, + const std::vector<string>& ops, + int maxstrlen, + const std::vector<string>& stralphabet, const string& wrapper, const string& topwrapper) { - if (RE2_DEBUG_MODE && FLAGS_quick_debug_mode) { + if (RE2_DEBUG_MODE) { if (maxatoms > 1) maxatoms--; if (maxops > 1) diff --git a/re2/testing/exhaustive_tester.h b/re2/testing/exhaustive_tester.h index a8f39eb..7c966cf 100644 --- a/re2/testing/exhaustive_tester.h +++ b/re2/testing/exhaustive_tester.h @@ -5,8 +5,10 @@ #ifndef RE2_TESTING_EXHAUSTIVE_TESTER_H_ #define RE2_TESTING_EXHAUSTIVE_TESTER_H_ +#include <stdint.h> #include <string> #include <vector> + #include "util/util.h" #include "re2/testing/regexp_generator.h" #include "re2/testing/string_generator.h" @@ -35,10 +37,10 @@ class ExhaustiveTester : public RegexpGenerator { public: ExhaustiveTester(int maxatoms, int maxops, - const vector<string>& alphabet, - const vector<string>& ops, + const std::vector<string>& alphabet, + const std::vector<string>& ops, int maxstrlen, - const vector<string>& stralphabet, + const std::vector<string>& stralphabet, const string& wrapper, const string& topwrapper) : RegexpGenerator(maxatoms, maxops, alphabet, ops), @@ -56,7 +58,7 @@ class ExhaustiveTester : public RegexpGenerator { void HandleRegexp(const string& regexp); // Causes testing to generate random input strings. - void RandomStrings(int32 seed, int32 count) { + void RandomStrings(int32_t seed, int32_t count) { randomstrings_ = true; stringseed_ = seed; stringcount_ = count; @@ -71,16 +73,19 @@ class ExhaustiveTester : public RegexpGenerator { int failures_; // Number of tests failed. bool randomstrings_; // Whether to use random strings - int32 stringseed_; // If so, the seed. + int32_t stringseed_; // If so, the seed. int stringcount_; // If so, how many to generate. - DISALLOW_COPY_AND_ASSIGN(ExhaustiveTester); + + ExhaustiveTester(const ExhaustiveTester&) = delete; + ExhaustiveTester& operator=(const ExhaustiveTester&) = delete; }; // Runs an exhaustive test on the given parameters. void ExhaustiveTest(int maxatoms, int maxops, - const vector<string>& alphabet, - const vector<string>& ops, - int maxstrlen, const vector<string>& stralphabet, + const std::vector<string>& alphabet, + const std::vector<string>& ops, + int maxstrlen, + const std::vector<string>& stralphabet, const string& wrapper, const string& topwrapper); diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc index 76c1284..c3b2a3c 100644 --- a/re2/testing/filtered_re2_test.cc +++ b/re2/testing/filtered_re2_test.cc @@ -2,7 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include <stddef.h> +#include <algorithm> +#include <memory> +#include <string> +#include <vector> + #include "util/test.h" +#include "util/logging.h" #include "re2/filtered_re2.h" #include "re2/re2.h" @@ -11,9 +18,9 @@ DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc namespace re2 { struct FilterTestVars { - vector<string> atoms; - vector<int> atom_indices; - vector<int> matches; + std::vector<string> atoms; + std::vector<int> atom_indices; + std::vector<int> matches; RE2::Options opts; FilteredRE2 f; }; @@ -144,14 +151,14 @@ bool CheckExpectedAtoms(const char* atoms[], int n, const char* testname, struct FilterTestVars* v) { - vector<string> expected; + std::vector<string> expected; for (int i = 0; i < n; i++) expected.push_back(atoms[i]); bool pass = expected.size() == v->atoms.size(); - sort(v->atoms.begin(), v->atoms.end()); - sort(expected.begin(), expected.end()); + std::sort(v->atoms.begin(), v->atoms.end()); + std::sort(expected.begin(), expected.end()); for (int i = 0; pass && i < n; i++) pass = pass && expected[i] == v->atoms[i]; @@ -189,9 +196,9 @@ TEST(FilteredRE2Test, AtomTests) { EXPECT_EQ(0, nfail); } -void FindAtomIndices(const vector<string>& atoms, - const vector<string>& matched_atoms, - vector<int>* atom_indices) { +void FindAtomIndices(const std::vector<string>& atoms, + const std::vector<string>& matched_atoms, + std::vector<int>* atom_indices) { atom_indices->clear(); for (size_t i = 0; i < matched_atoms.size(); i++) { for (size_t j = 0; j < atoms.size(); j++) { @@ -218,8 +225,8 @@ TEST(FilteredRE2Test, MatchEmptyPattern) { break; AddRegexpsAndCompile(t->regexps, nregexp, &v); string text = "0123"; - vector<int> atom_ids; - vector<int> matching_regexps; + std::vector<int> atom_ids; + std::vector<int> matching_regexps; EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids)); } @@ -239,11 +246,11 @@ TEST(FilteredRE2Test, MatchTests) { string text = "abc121212xyz"; // atoms = abc - vector<int> atom_ids; - vector<string> atoms; + std::vector<int> atom_ids; + std::vector<string> atoms; atoms.push_back("abc"); FindAtomIndices(v.atoms, atoms, &atom_ids); - vector<int> matching_regexps; + std::vector<int> matching_regexps; v.f.AllMatches(text, atom_ids, &matching_regexps); EXPECT_EQ(1, matching_regexps.size()); diff --git a/re2/testing/mimics_pcre_test.cc b/re2/testing/mimics_pcre_test.cc index f965092..541e7d2 100644 --- a/re2/testing/mimics_pcre_test.cc +++ b/re2/testing/mimics_pcre_test.cc @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "util/test.h" +#include "util/logging.h" #include "re2/prog.h" #include "re2/regexp.h" diff --git a/re2/testing/null_walker.cc b/re2/testing/null_walker.cc index bc943f4..77fa723 100644 --- a/re2/testing/null_walker.cc +++ b/re2/testing/null_walker.cc @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "util/test.h" +#include "util/logging.h" #include "re2/regexp.h" #include "re2/walker-inl.h" @@ -23,7 +24,8 @@ class NullWalker : public Regexp::Walker<bool> { } private: - DISALLOW_COPY_AND_ASSIGN(NullWalker); + NullWalker(const NullWalker&) = delete; + NullWalker& operator=(const NullWalker&) = delete; }; // Called after visiting re's children. child_args contains the return diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc index e204ce1..95cb0e6 100644 --- a/re2/testing/parse_test.cc +++ b/re2/testing/parse_test.cc @@ -5,8 +5,9 @@ // Test parse.cc, dump.cc, and tostring.cc. #include <string> -#include <vector> + #include "util/test.h" +#include "util/logging.h" #include "re2/regexp.h" namespace re2 { diff --git a/re2/testing/possible_match_test.cc b/re2/testing/possible_match_test.cc index 4687165..ca8f5e1 100644 --- a/re2/testing/possible_match_test.cc +++ b/re2/testing/possible_match_test.cc @@ -2,8 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include <string.h> +#include <string> #include <vector> + #include "util/test.h" +#include "util/logging.h" +#include "util/strutil.h" #include "re2/prog.h" #include "re2/re2.h" #include "re2/regexp.h" @@ -167,10 +172,10 @@ class PossibleMatchTester : public RegexpGenerator { public: PossibleMatchTester(int maxatoms, int maxops, - const vector<string>& alphabet, - const vector<string>& ops, + const std::vector<string>& alphabet, + const std::vector<string>& ops, int maxstrlen, - const vector<string>& stralphabet) + const std::vector<string>& stralphabet) : RegexpGenerator(maxatoms, maxops, alphabet, ops), strgen_(maxstrlen, stralphabet), regexps_(0), tests_(0) { } @@ -187,7 +192,8 @@ class PossibleMatchTester : public RegexpGenerator { int regexps_; // Number of HandleRegexp calls int tests_; // Number of regexp tests. - DISALLOW_COPY_AND_ASSIGN(PossibleMatchTester); + PossibleMatchTester(const PossibleMatchTester&) = delete; + PossibleMatchTester& operator=(const PossibleMatchTester&) = delete; }; // Processes a single generated regexp. diff --git a/re2/testing/random_test.cc b/re2/testing/random_test.cc index d67ae64..bd0842f 100644 --- a/re2/testing/random_test.cc +++ b/re2/testing/random_test.cc @@ -5,6 +5,9 @@ // Random testing of regular expression matching. #include <stdio.h> +#include <string> +#include <vector> + #include "util/test.h" #include "re2/testing/exhaustive_tester.h" @@ -19,9 +22,10 @@ namespace re2 { // (Always uses the same random seeds for reproducibility. // Can give different seeds on command line.) static void RandomTest(int maxatoms, int maxops, - const vector<string>& alphabet, - const vector<string>& ops, - int maxstrlen, const vector<string>& stralphabet, + const std::vector<string>& alphabet, + const std::vector<string>& ops, + int maxstrlen, + const std::vector<string>& stralphabet, const string& wrapper) { // Limit to smaller test cases in debug mode, // because everything is so much slower. @@ -75,7 +79,7 @@ TEST(Random, BigEgrepCaptures) { // character classes like \d. (Adding larger character classes would // make for too many possibilities.) TEST(Random, Complicated) { - vector<string> ops = Split(" ", + std::vector<string> ops = Split(" ", "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? " "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} " "%s{2} %s{2,} %s{3,4} %s{4,5}"); @@ -83,11 +87,11 @@ TEST(Random, Complicated) { // Use (?:\b) and (?:\B) instead of \b and \B, // because PCRE rejects \b* but accepts (?:\b)*. // Ditto ^ and $. - vector<string> atoms = Split(" ", + std::vector<string> atoms = Split(" ", ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v " "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) " "a (a) b c - \\\\"); - vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a"); + std::vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a"); RandomTest(10, 10, atoms, ops, 20, alphabet, ""); } diff --git a/re2/testing/re2_arg_test.cc b/re2/testing/re2_arg_test.cc index 06c58f1..7a38de7 100644 --- a/re2/testing/re2_arg_test.cc +++ b/re2/testing/re2_arg_test.cc @@ -7,6 +7,9 @@ // Todo: Expand the test to validate strings parsed to the other types // supported by RE2::Arg class +#include <stdint.h> +#include <string.h> + #include "util/test.h" #include "re2/re2.h" @@ -14,7 +17,7 @@ namespace re2 { struct SuccessTable { const char * value_string; - int64 value; + int64_t value; bool success[6]; }; @@ -25,7 +28,7 @@ struct SuccessTable { // the various integral types and has entries for whether or not each // type can contain the given value. const SuccessTable kSuccessTable[] = { -// string integer value short ushort int uint int64 uint64 +// string integer value i16 u16 i32 u32 i64 u64 // 0 to 2^7-1 { "0", 0, { true, true, true, true, true, true }}, { "127", 127, { true, true, true, true, true, true }}, @@ -56,9 +59,8 @@ const SuccessTable kSuccessTable[] = { // -2^15-1 to -2^31 { "-32769", -32769, { false, false, true, false, true, false }}, -{ "-2147483648", - static_cast<int64>(0xFFFFFFFF80000000LL), -{ false, false, true, false, true, false }}, +{ "-2147483648", static_cast<int64_t>(0xFFFFFFFF80000000LL), + { false, false, true, false, true, false }}, // 2^31 to 2^32-1 { "2147483648", 2147483648U, { false, false, false, true, true, true }}, @@ -71,13 +73,13 @@ const SuccessTable kSuccessTable[] = { // -2^31-1 to -2^63 { "-2147483649", -2147483649LL, { false, false, false, false, true, false }}, -{ "-9223372036854775808", static_cast<int64>(0x8000000000000000LL), +{ "-9223372036854775808", static_cast<int64_t>(0x8000000000000000LL), { false, false, false, false, true, false }}, // 2^63 to 2^64-1 -{ "9223372036854775808", static_cast<int64>(9223372036854775808ULL), +{ "9223372036854775808", static_cast<int64_t>(9223372036854775808ULL), { false, false, false, false, false, true }}, -{ "18446744073709551615", static_cast<int64>(18446744073709551615ULL), +{ "18446744073709551615", static_cast<int64_t>(18446744073709551615ULL), { false, false, false, false, false, true }}, // >= 2^64 @@ -95,7 +97,7 @@ const int kNumStrings = arraysize(kSuccessTable); for (int i = 0; i < kNumStrings; ++i) { \ RE2::Arg arg(&r); \ const char* const p = kSuccessTable[i].value_string; \ - bool retval = arg.Parse(p, static_cast<int>(strlen(p))); \ + bool retval = arg.Parse(p, strlen(p)); \ bool success = kSuccessTable[i].success[column]; \ EXPECT_EQ(retval, success) \ << "Parsing '" << p << "' for type " #type " should return " \ @@ -107,27 +109,27 @@ const int kNumStrings = arraysize(kSuccessTable); } TEST(RE2ArgTest, Int16Test) { - PARSE_FOR_TYPE(int16, 0); + PARSE_FOR_TYPE(int16_t, 0); } TEST(RE2ArgTest, Uint16Test) { - PARSE_FOR_TYPE(uint16, 1); + PARSE_FOR_TYPE(uint16_t, 1); } -TEST(RE2ArgTest, IntTest) { - PARSE_FOR_TYPE(int, 2); +TEST(RE2ArgTest, Int32Test) { + PARSE_FOR_TYPE(int32_t, 2); } TEST(RE2ArgTest, Uint32Test) { - PARSE_FOR_TYPE(uint32, 3); + PARSE_FOR_TYPE(uint32_t, 3); } TEST(RE2ArgTest, Int64Test) { - PARSE_FOR_TYPE(int64, 4); + PARSE_FOR_TYPE(int64_t, 4); } TEST(RE2ArgTest, Uint64Test) { - PARSE_FOR_TYPE(uint64, 5); + PARSE_FOR_TYPE(uint64_t, 5); } } // namespace re2 diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc index 830b3f7..d42d597 100644 --- a/re2/testing/re2_test.cc +++ b/re2/testing/re2_test.cc @@ -6,14 +6,20 @@ // TODO: Test extractions for PartialMatch/Consume #include <errno.h> -#if !defined(_MSC_VER) && !defined(__MINGW32__) -#include <unistd.h> /* for sysconf */ +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <map> +#include <string> +#include <utility> +#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__) #include <sys/mman.h> +#include <unistd.h> /* for sysconf */ #endif -#include <sys/stat.h> -#include <sys/types.h> -#include <vector> + #include "util/test.h" +#include "util/logging.h" +#include "util/strutil.h" #include "re2/re2.h" #include "re2/regexp.h" @@ -476,7 +482,7 @@ TEST(ProgramFanout, BigProgram) { RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)"); RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)"); - map<int, int> histogram; + std::map<int, int> histogram; // 3 is the largest non-empty bucket and has 1 element. CHECK_EQ(3, re1.ProgramFanout(&histogram)); @@ -529,14 +535,14 @@ TEST(Capture, NamedGroups) { { RE2 re("(hello world)"); CHECK_EQ(re.NumberOfCapturingGroups(), 1); - const map<string, int>& m = re.NamedCapturingGroups(); + const std::map<string, int>& m = re.NamedCapturingGroups(); CHECK_EQ(m.size(), 0); } { RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); CHECK_EQ(re.NumberOfCapturingGroups(), 6); - const map<string, int>& m = re.NamedCapturingGroups(); + const std::map<string, int>& m = re.NamedCapturingGroups(); CHECK_EQ(m.size(), 4); CHECK_EQ(m.find("A")->second, 1); CHECK_EQ(m.find("B")->second, 2); @@ -558,7 +564,7 @@ TEST(RE2, CapturedGroupTest) { const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3}; EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose", re, matches, num_groups)); - const map<string, int>& named_groups = re.NamedCapturingGroups(); + const std::map<string, int>& named_groups = re.NamedCapturingGroups(); EXPECT_TRUE(named_groups.find("S") != named_groups.end()); EXPECT_TRUE(named_groups.find("D") != named_groups.end()); @@ -766,7 +772,7 @@ TEST(RE2, FullMatchTypeTests) { CHECK_EQ(c, static_cast<unsigned char>('H')); } { - int16 v; + int16_t v; CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); CHECK(RE2::FullMatch("32767", "(-?\\d+)", &v)); CHECK_EQ(v, 32767); @@ -775,16 +781,16 @@ TEST(RE2, FullMatchTypeTests) { CHECK(!RE2::FullMatch("32768", "(-?\\d+)", &v)); } { - uint16 v; + uint16_t v; CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100); CHECK(RE2::FullMatch("32767", "(\\d+)", &v)); CHECK_EQ(v, 32767); CHECK(RE2::FullMatch("65535", "(\\d+)", &v)); CHECK_EQ(v, 65535); CHECK(!RE2::FullMatch("65536", "(\\d+)", &v)); } { - int32 v; - static const int32 max = 0x7fffffff; - static const int32 min = -max - 1; + int32_t v; + static const int32_t max = INT32_C(0x7fffffff); + static const int32_t min = -max - 1; CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); CHECK(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); CHECK_EQ(v, max); @@ -803,8 +809,8 @@ TEST(RE2, FullMatchTypeTests) { CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v))); } { - uint32 v; - static const uint32 max = 0xfffffffful; + uint32_t v; + static const uint32_t max = UINT32_C(0xffffffff); CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100); CHECK(RE2::FullMatch("4294967295", "(\\d+)", &v)); CHECK_EQ(v, max); CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v)); @@ -813,45 +819,45 @@ TEST(RE2, FullMatchTypeTests) { CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max); } { - int64 v; - static const int64 max = 0x7fffffffffffffffull; - static const int64 min = -max - 1; - char buf[32]; + int64_t v; + static const int64_t max = INT64_C(0x7fffffffffffffff); + static const int64_t min = -max - 1; + string str; CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); - snprintf(buf, sizeof(buf), "%lld", (long long int)max); - CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); + str = std::to_string(max); + CHECK(RE2::FullMatch(str, "(-?\\d+)", &v)); CHECK_EQ(v, max); - snprintf(buf, sizeof(buf), "%lld", (long long int)min); - CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, min); + str = std::to_string(min); + CHECK(RE2::FullMatch(str, "(-?\\d+)", &v)); CHECK_EQ(v, min); - snprintf(buf, sizeof(buf), "%lld", (long long int)max); - assert(buf[strlen(buf)-1] != '9'); - buf[strlen(buf)-1]++; - CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); + str = std::to_string(max); + CHECK_NE(str.back(), '9'); + str.back()++; + CHECK(!RE2::FullMatch(str, "(-?\\d+)", &v)); - snprintf(buf, sizeof(buf), "%lld", (long long int)min); - assert(buf[strlen(buf)-1] != '9'); - buf[strlen(buf)-1]++; - CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); + str = std::to_string(min); + CHECK_NE(str.back(), '9'); + str.back()++; + CHECK(!RE2::FullMatch(str, "(-?\\d+)", &v)); } { - uint64 v; - int64 v2; - static const uint64 max = 0xffffffffffffffffull; - char buf[32]; + uint64_t v; + int64_t v2; + static const uint64_t max = UINT64_C(0xffffffffffffffff); + string str; CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100); - snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); - CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); + str = std::to_string(max); + CHECK(RE2::FullMatch(str, "(-?\\d+)", &v)); CHECK_EQ(v, max); - assert(buf[strlen(buf)-1] != '9'); - buf[strlen(buf)-1]++; - CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); + CHECK_NE(str.back(), '9'); + str.back()++; + CHECK(!RE2::FullMatch(str, "(-?\\d+)", &v)); } } @@ -881,8 +887,8 @@ TEST(RE2, FloatingPointFullMatchTypes) { // short. // // This is known to fail on Cygwin and MinGW due to a broken - // implementation of strtof(3). Sigh. -#if !defined(__CYGWIN__) && !defined(__MINGW32__) + // implementation of strtof(3). And apparently MSVC too. Sigh. +#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__) CHECK(RE2::FullMatch("0.1", "(.*)", &v)); CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f); CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v)); @@ -1498,8 +1504,8 @@ TEST(RE2, CapturingGroupNames) { // 12 3 45 6 7 RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))"); EXPECT_TRUE(re.ok()); - const map<int, string>& have = re.CapturingGroupNames(); - map<int, string> want; + const std::map<int, string>& have = re.CapturingGroupNames(); + std::map<int, string> want; want[3] = "G2"; want[6] = "G2"; want[7] = "G1"; @@ -1600,11 +1606,28 @@ TEST(RE2, Bug26356109) { string s = "abc"; StringPiece m; - CHECK(re.Match(s, 0, static_cast<int>(s.size()), RE2::UNANCHORED, &m, 1)); + CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1)); CHECK_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'"; - CHECK(re.Match(s, 0, static_cast<int>(s.size()), RE2::ANCHOR_BOTH, &m, 1)); + CHECK(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1)); CHECK_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'"; } +TEST(RE2, Issue104) { + // RE2::GlobalReplace always advanced by one byte when the empty string was + // matched, which would clobber any rune that is longer than one byte. + + string s = "bc"; + CHECK_EQ(3, RE2::GlobalReplace(&s, "a*", "d")); + CHECK_EQ("dbdcd", s); + + s = "ąć"; + CHECK_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ")); + CHECK_EQ("ĈąĈćĈ", s); + + s = "人类"; + CHECK_EQ(3, RE2::GlobalReplace(&s, "大*", "小")); + CHECK_EQ("小人小类小", s); +} + } // namespace re2 diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc index 311a60e..8f437b5 100644 --- a/re2/testing/regexp_benchmark.cc +++ b/re2/testing/regexp_benchmark.cc @@ -4,7 +4,15 @@ // Benchmarks for regular expression implementations. +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string> +#include <utility> + #include "util/test.h" +#include "util/logging.h" +#include "util/strutil.h" #include "re2/prog.h" #include "re2/re2.h" #include "re2/regexp.h" @@ -91,11 +99,8 @@ void MemoryUsage() { fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); } - fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n", - static_cast<int>(sizeof(PCRE)), - static_cast<int>(sizeof(RE2)), - static_cast<int>(sizeof(Prog)), - static_cast<int>(sizeof(Prog::Inst))); + fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n", + sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst)); } // Regular expression implementation wrappers. @@ -156,7 +161,7 @@ void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) { BenchmarkMemoryUsage(); StartBenchmarkTiming(); search(iters, regexp, s, Prog::kUnanchored, false); - SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); + SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes); } // These two are easy because they start with an A, @@ -265,7 +270,7 @@ void SearchBigFixed(int iters, int nbytes, SearchImpl* search) { BenchmarkMemoryUsage(); StartBenchmarkTiming(); search(iters, regexp.c_str(), s, Prog::kUnanchored, true); - SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); + SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes); } void Search_BigFixed_CachedDFA(int i, int n) { SearchBigFixed(i, n, SearchCachedDFA); } @@ -295,7 +300,7 @@ void FindAndConsume(int iters, int nbytes) { CHECK(RE2::FindAndConsume(&t, re, &u)); CHECK_EQ(u, "Hello World"); } - SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); + SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes); } BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs()); @@ -309,7 +314,7 @@ void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search BenchmarkMemoryUsage(); StartBenchmarkTiming(); search(iters, regexp, s, Prog::kAnchored, true); - SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); + SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes); } // Unambiguous search (RE2 can use OnePass). @@ -383,7 +388,7 @@ void SearchAltMatch(int iters, int nbytes, SearchImpl* search) { BenchmarkMemoryUsage(); StartBenchmarkTiming(); search(iters, "\\C*", s, Prog::kAnchored, true); - SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); + SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes); } void Search_AltMatch_DFA(int i, int n) { SearchAltMatch(i, n, SearchDFA); } @@ -771,7 +776,7 @@ void SearchPhone(int iters, int nbytes, ParseImpl* search) { BenchmarkMemoryUsage(); StartBenchmarkTiming(); search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s); - SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); + SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes); } void SearchPhone_CachedPCRE(int i, int n) { @@ -798,7 +803,7 @@ static string DeBruijnString(int n) { CHECK_LT(n, 8*sizeof(int)); CHECK_GT(n, 0); - vector<bool> did(1<<n); + std::vector<bool> did(1<<n); for (int i = 0; i < 1<<n; i++) did[i] = false; @@ -827,11 +832,12 @@ void CacheFill(int iters, int n, SearchImpl *srch) { string t; for (int i = n+1; i < 20; i++) { t = s + s; + using std::swap; swap(s, t); } srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s, Prog::kUnanchored, true); - SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size()); + SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size()); } void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); } @@ -1492,7 +1498,7 @@ void FullMatchPCRE(int iter, int n, const char *regexp) { StartBenchmarkTiming(); for (int i = 0; i < iter; i++) CHECK(PCRE::FullMatch(s, re)); - SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n); + SetBenchmarkBytesProcessed(static_cast<int64_t>(iter)*n); } void FullMatchRE2(int iter, int n, const char *regexp) { @@ -1505,7 +1511,7 @@ void FullMatchRE2(int iter, int n, const char *regexp) { StartBenchmarkTiming(); for (int i = 0; i < iter; i++) CHECK(RE2::FullMatch(s, re)); - SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n); + SetBenchmarkBytesProcessed(static_cast<int64_t>(iter)*n); } void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); } diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc index fd085db..c2f3400 100644 --- a/re2/testing/regexp_generator.cc +++ b/re2/testing/regexp_generator.cc @@ -20,17 +20,25 @@ // Then RunPostfix turns each sequence into a regular expression // and passes the regexp to HandleRegexp. +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> #include <string.h> -#include <string> +#include <memory> #include <stack> +#include <string> #include <vector> + #include "util/test.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/testing/regexp_generator.h" namespace re2 { // Returns a vector of the egrep regexp operators. -const vector<string>& RegexpGenerator::EgrepOps() { +const std::vector<string>& RegexpGenerator::EgrepOps() { static const char *ops[] = { "%s%s", "%s|%s", @@ -39,13 +47,13 @@ const vector<string>& RegexpGenerator::EgrepOps() { "%s?", "%s\\C*", }; - static vector<string> v(ops, ops + arraysize(ops)); + static std::vector<string> v(ops, ops + arraysize(ops)); return v; } RegexpGenerator::RegexpGenerator(int maxatoms, int maxops, - const vector<string>& atoms, - const vector<string>& ops) + const std::vector<string>& atoms, + const std::vector<string>& ops) : maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) { // Degenerate case. if (atoms_.size() == 0) @@ -57,21 +65,18 @@ RegexpGenerator::RegexpGenerator(int maxatoms, int maxops, // Generates all possible regular expressions (within the parameters), // calling HandleRegexp for each one. void RegexpGenerator::Generate() { - vector<string> postfix; + std::vector<string> postfix; GeneratePostfix(&postfix, 0, 0, 0); } // Generates random regular expressions, calling HandleRegexp for each one. -void RegexpGenerator::GenerateRandom(int32 seed, int n) { - ACMRandom acm(seed); - acm_ = &acm; +void RegexpGenerator::GenerateRandom(int32_t seed, int n) { + rng_.seed(seed); for (int i = 0; i < n; i++) { - vector<string> postfix; + std::vector<string> postfix; GenerateRandomPostfix(&postfix, 0, 0, 0); } - - acm_ = NULL; } // Counts and returns the number of occurrences of "%s" in s. @@ -98,7 +103,7 @@ static int CountArgs(const string& s) { // // The initial call should be GeneratePostfix([empty vector], 0, 0, 0). // -void RegexpGenerator::GeneratePostfix(vector<string>* post, int nstk, +void RegexpGenerator::GeneratePostfix(std::vector<string>* post, int nstk, int ops, int atoms) { if (nstk == 1) RunPostfix(*post); @@ -134,11 +139,18 @@ void RegexpGenerator::GeneratePostfix(vector<string>* post, int nstk, // Generates a random postfix command sequence. // Stops and returns true once a single sequence has been generated. -bool RegexpGenerator::GenerateRandomPostfix(vector<string>* post, int nstk, +bool RegexpGenerator::GenerateRandomPostfix(std::vector<string>* post, int nstk, int ops, int atoms) { + std::uniform_int_distribution<int> random_stop(0, maxatoms_ - atoms); + std::uniform_int_distribution<int> random_bit(0, 1); + std::uniform_int_distribution<int> random_ops_index( + 0, static_cast<int>(ops_.size()) - 1); + std::uniform_int_distribution<int> random_atoms_index( + 0, static_cast<int>(atoms_.size()) - 1); + for (;;) { // Stop if we get to a single element, but only sometimes. - if (nstk == 1 && acm_->Uniform(maxatoms_ + 1 - atoms) == 0) { + if (nstk == 1 && random_stop(rng_) == 0) { RunPostfix(*post); return true; } @@ -150,8 +162,8 @@ bool RegexpGenerator::GenerateRandomPostfix(vector<string>* post, int nstk, return false; // Add operators if there are enough arguments. - if (ops < maxops_ && acm_->Uniform(2) == 0) { - const string& fmt = ops_[acm_->Uniform(static_cast<int32>(ops_.size()))]; + if (ops < maxops_ && random_bit(rng_) == 0) { + const string& fmt = ops_[random_ops_index(rng_)]; int nargs = CountArgs(fmt); if (nargs <= nstk) { post->push_back(fmt); @@ -164,8 +176,8 @@ bool RegexpGenerator::GenerateRandomPostfix(vector<string>* post, int nstk, } // Add atoms if there is room. - if (atoms < maxatoms_ && acm_->Uniform(2) == 0) { - post->push_back(atoms_[acm_->Uniform(static_cast<int32>(atoms_.size()))]); + if (atoms < maxatoms_ && random_bit(rng_) == 0) { + post->push_back(atoms_[random_atoms_index(rng_)]); bool ret = GenerateRandomPostfix(post, nstk + 1, ops, atoms + 1); post->pop_back(); if (ret) @@ -177,8 +189,8 @@ bool RegexpGenerator::GenerateRandomPostfix(vector<string>* post, int nstk, // Interprets the postfix command sequence to create a regular expression // passed to HandleRegexp. The results of operators like %s|%s are wrapped // in (?: ) to avoid needing to maintain a precedence table. -void RegexpGenerator::RunPostfix(const vector<string>& post) { - stack<string> regexps; +void RegexpGenerator::RunPostfix(const std::vector<string>& post) { + std::stack<string> regexps; for (size_t i = 0; i < post.size(); i++) { switch (CountArgs(post[i])) { default: @@ -226,8 +238,8 @@ void RegexpGenerator::RunPostfix(const vector<string>& post) { } // Split s into an vector of strings, one for each UTF-8 character. -vector<string> Explode(const StringPiece& s) { - vector<string> v; +std::vector<string> Explode(const StringPiece& s) { + std::vector<string> v; for (const char *q = s.begin(); q < s.end(); ) { const char* p = q; @@ -241,8 +253,8 @@ vector<string> Explode(const StringPiece& s) { // Split string everywhere a substring is found, returning // vector of pieces. -vector<string> Split(const StringPiece& sep, const StringPiece& s) { - vector<string> v; +std::vector<string> Split(const StringPiece& sep, const StringPiece& s) { + std::vector<string> v; if (sep.size() == 0) return Explode(s); diff --git a/re2/testing/regexp_generator.h b/re2/testing/regexp_generator.h index 06ea4c4..b746399 100644 --- a/re2/testing/regexp_generator.h +++ b/re2/testing/regexp_generator.h @@ -8,9 +8,11 @@ // Regular expression generator: generates all possible // regular expressions within given parameters (see below for details). +#include <stdint.h> +#include <random> #include <string> #include <vector> -#include "util/random.h" + #include "util/util.h" #include "re2/stringpiece.h" @@ -27,43 +29,46 @@ namespace re2 { // class RegexpGenerator { public: - RegexpGenerator(int maxatoms, int maxops, const vector<string>& atoms, - const vector<string>& ops); + RegexpGenerator(int maxatoms, int maxops, const std::vector<string>& atoms, + const std::vector<string>& ops); virtual ~RegexpGenerator() {} // Generates all the regular expressions, calling HandleRegexp(re) for each. void Generate(); // Generates n random regular expressions, calling HandleRegexp(re) for each. - void GenerateRandom(int32 seed, int n); + void GenerateRandom(int32_t seed, int n); // Handles a regular expression. Must be provided by subclass. virtual void HandleRegexp(const string& regexp) = 0; // The egrep regexp operators: * + ? | and concatenation. - static const vector<string>& EgrepOps(); + static const std::vector<string>& EgrepOps(); private: - void RunPostfix(const vector<string>& post); - void GeneratePostfix(vector<string>* post, int nstk, int ops, int lits); - bool GenerateRandomPostfix(vector<string>* post, int nstk, int ops, int lits); - - int maxatoms_; // Maximum number of atoms allowed in expr. - int maxops_; // Maximum number of ops allowed in expr. - vector<string> atoms_; // Possible atoms. - vector<string> ops_; // Possible ops. - ACMRandom* acm_; // Random generator. - DISALLOW_COPY_AND_ASSIGN(RegexpGenerator); + void RunPostfix(const std::vector<string>& post); + void GeneratePostfix(std::vector<string>* post, int nstk, int ops, int lits); + bool GenerateRandomPostfix(std::vector<string>* post, int nstk, int ops, + int lits); + + int maxatoms_; // Maximum number of atoms allowed in expr. + int maxops_; // Maximum number of ops allowed in expr. + std::vector<string> atoms_; // Possible atoms. + std::vector<string> ops_; // Possible ops. + std::minstd_rand0 rng_; // Random number generator. + + RegexpGenerator(const RegexpGenerator&) = delete; + RegexpGenerator& operator=(const RegexpGenerator&) = delete; }; // Helpers for preparing arguments to RegexpGenerator constructor. // Returns one string for each character in s. -vector<string> Explode(const StringPiece& s); +std::vector<string> Explode(const StringPiece& s); // Splits string everywhere sep is found, returning // vector of pieces. -vector<string> Split(const StringPiece& sep, const StringPiece& s); +std::vector<string> Split(const StringPiece& sep, const StringPiece& s); } // namespace re2 diff --git a/re2/testing/regexp_test.cc b/re2/testing/regexp_test.cc index 31c76a3..e612eae 100644 --- a/re2/testing/regexp_test.cc +++ b/re2/testing/regexp_test.cc @@ -4,9 +4,13 @@ // Test parse.cc, dump.cc, and tostring.cc. +#include <stddef.h> +#include <map> #include <string> #include <vector> + #include "util/test.h" +#include "util/logging.h" #include "re2/regexp.h" namespace re2 { @@ -28,7 +32,7 @@ TEST(Regexp, BigRef) { TEST(Regexp, BigConcat) { Regexp* x; x = Regexp::Parse("x", Regexp::NoParseFlags, NULL); - vector<Regexp*> v(90000, x); // ToString bails out at 100000 + std::vector<Regexp*> v(90000, x); // ToString bails out at 100000 for (size_t i = 0; i < v.size(); i++) x->Incref(); CHECK_EQ(x->Ref(), 1 + static_cast<int>(v.size())) << x->Ref(); @@ -47,11 +51,11 @@ TEST(Regexp, NamedCaptures) { "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status); EXPECT_TRUE(status.ok()); EXPECT_EQ(4, x->NumCaptures()); - const map<string, int>* have = x->NamedCaptures(); + const std::map<string, int>* have = x->NamedCaptures(); EXPECT_TRUE(have != NULL); EXPECT_EQ(2, have->size()); // there are only two named groups in // the regexp: 'g1' and 'g2'. - map<string, int> want; + std::map<string, int> want; want["g1"] = 1; want["g2"] = 3; EXPECT_EQ(want, *have); @@ -66,10 +70,10 @@ TEST(Regexp, CaptureNames) { "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status); EXPECT_TRUE(status.ok()); EXPECT_EQ(4, x->NumCaptures()); - const map<int, string>* have = x->CaptureNames(); + const std::map<int, string>* have = x->CaptureNames(); EXPECT_TRUE(have != NULL); EXPECT_EQ(3, have->size()); - map<int, string> want; + std::map<int, string> want; want[1] = "g1"; want[3] = "g2"; want[4] = "g1"; diff --git a/re2/testing/required_prefix_test.cc b/re2/testing/required_prefix_test.cc index aed41f7..d535e87 100644 --- a/re2/testing/required_prefix_test.cc +++ b/re2/testing/required_prefix_test.cc @@ -2,7 +2,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include <string> + #include "util/test.h" +#include "util/logging.h" #include "re2/regexp.h" namespace re2 { diff --git a/re2/testing/search_test.cc b/re2/testing/search_test.cc index 08e5b4c..144233e 100644 --- a/re2/testing/search_test.cc +++ b/re2/testing/search_test.cc @@ -2,14 +2,17 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include <stdlib.h> -#include <vector> #include "util/test.h" #include "re2/prog.h" #include "re2/regexp.h" #include "re2/testing/tester.h" #include "re2/testing/exhaustive_tester.h" +// For target `log' in the Makefile. +#ifndef LOGGING +#define LOGGING 0 +#endif + namespace re2 { struct RegexpTest { @@ -314,15 +317,14 @@ TEST(Regexp, SearchTests) { if (!TestRegexpOnText(t.regexp, t.text)) failures++; -#ifdef LOGGING - // Build a dummy ExhaustiveTest call that will trigger just - // this one test, so that we log the test case. - vector<string> atom, alpha, ops; - atom.push_back(StringPiece(t.regexp).as_string()); - alpha.push_back(StringPiece(t.text).as_string()); - ExhaustiveTest(1, 0, atom, ops, 1, alpha, "", ""); -#endif - + if (LOGGING) { + // Build a dummy ExhaustiveTest call that will trigger just + // this one test, so that we log the test case. + std::vector<string> atom, alpha, ops; + atom.push_back(StringPiece(t.regexp).ToString()); + alpha.push_back(StringPiece(t.text).ToString()); + ExhaustiveTest(1, 0, atom, ops, 1, alpha, "", ""); + } } EXPECT_EQ(failures, 0); } diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc index c613d6a..b6a24de 100644 --- a/re2/testing/set_test.cc +++ b/re2/testing/set_test.cc @@ -2,11 +2,11 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include <sys/types.h> -#include <sys/stat.h> +#include <stddef.h> #include <vector> #include "util/test.h" +#include "util/logging.h" #include "re2/re2.h" #include "re2/set.h" @@ -24,7 +24,7 @@ TEST(Set, Unanchored) { CHECK_EQ(s.Match("fooba", NULL), true); CHECK_EQ(s.Match("oobar", NULL), true); - vector<int> v; + std::vector<int> v; CHECK_EQ(s.Match("foobar", &v), true); CHECK_EQ(v.size(), 2); CHECK_EQ(v[0], 0); @@ -52,7 +52,7 @@ TEST(Set, UnanchoredFactored) { CHECK_EQ(s.Match("fooba", NULL), true); CHECK_EQ(s.Match("oobar", NULL), false); - vector<int> v; + std::vector<int> v; CHECK_EQ(s.Match("foobar", &v), true); CHECK_EQ(v.size(), 2); CHECK_EQ(v[0], 0); @@ -79,7 +79,7 @@ TEST(Set, UnanchoredDollar) { CHECK_EQ(s.Match("foo", NULL), true); - vector<int> v; + std::vector<int> v; CHECK_EQ(s.Match("foo", &v), true); CHECK_EQ(v.size(), 1); CHECK_EQ(v[0], 0); @@ -99,7 +99,7 @@ TEST(Set, Anchored) { CHECK_EQ(s.Match("foo", NULL), true); CHECK_EQ(s.Match("bar", NULL), true); - vector<int> v; + std::vector<int> v; CHECK_EQ(s.Match("foobar", &v), false); CHECK_EQ(v.size(), 0); @@ -126,7 +126,7 @@ TEST(Set, EmptyUnanchored) { CHECK_EQ(s.Match("", NULL), false); CHECK_EQ(s.Match("foobar", NULL), false); - vector<int> v; + std::vector<int> v; CHECK_EQ(s.Match("", &v), false); CHECK_EQ(v.size(), 0); @@ -142,7 +142,7 @@ TEST(Set, EmptyAnchored) { CHECK_EQ(s.Match("", NULL), false); CHECK_EQ(s.Match("foobar", NULL), false); - vector<int> v; + std::vector<int> v; CHECK_EQ(s.Match("", &v), false); CHECK_EQ(v.size(), 0); @@ -160,7 +160,7 @@ TEST(Set, Prefix) { CHECK_EQ(s.Match("/prefix/", NULL), true); CHECK_EQ(s.Match("/prefix/42", NULL), true); - vector<int> v; + std::vector<int> v; CHECK_EQ(s.Match("/prefix", &v), false); CHECK_EQ(v.size(), 0); diff --git a/re2/testing/simplify_test.cc b/re2/testing/simplify_test.cc index 9db41ee..33f8a8c 100644 --- a/re2/testing/simplify_test.cc +++ b/re2/testing/simplify_test.cc @@ -4,9 +4,11 @@ // Test simplify.cc. +#include <string.h> #include <string> -#include <vector> + #include "util/test.h" +#include "util/logging.h" #include "re2/regexp.h" namespace re2 { diff --git a/re2/testing/string_generator.cc b/re2/testing/string_generator.cc index f96ff20..b789950 100644 --- a/re2/testing/string_generator.cc +++ b/re2/testing/string_generator.cc @@ -6,17 +6,22 @@ // maxlen letters using the set of letters in alpha. // Fetch strings using a Java-like Next()/HasNext() interface. +#include <stddef.h> +#include <stdint.h> #include <string> #include <vector> + #include "util/test.h" +#include "util/logging.h" #include "re2/testing/string_generator.h" namespace re2 { -StringGenerator::StringGenerator(int maxlen, const vector<string>& alphabet) +StringGenerator::StringGenerator(int maxlen, + const std::vector<string>& alphabet) : maxlen_(maxlen), alphabet_(alphabet), generate_null_(false), - random_(false), nrandom_(0), acm_(NULL) { + random_(false), nrandom_(0) { // Degenerate case: no letters, no non-empty strings. if (alphabet_.size() == 0) @@ -26,10 +31,6 @@ StringGenerator::StringGenerator(int maxlen, const vector<string>& alphabet) hasnext_ = true; } -StringGenerator::~StringGenerator() { - delete acm_; -} - // Resets the string generator state to the beginning. void StringGenerator::Reset() { digits_.clear(); @@ -64,11 +65,15 @@ bool StringGenerator::RandomDigits() { if (--nrandom_ <= 0) return false; + std::uniform_int_distribution<int> random_len(0, maxlen_); + std::uniform_int_distribution<int> random_alphabet_index( + 0, static_cast<int>(alphabet_.size()) - 1); + // Pick length. - int len = acm_->Uniform(maxlen_+1); + int len = random_len(rng_); digits_.resize(len); for (int i = 0; i < len; i++) - digits_[i] = acm_->Uniform(static_cast<int32>(alphabet_.size())); + digits_[i] = random_alphabet_index(rng_); return true; } @@ -93,11 +98,8 @@ const StringPiece& StringGenerator::Next() { } // Sets generator up to return n random strings. -void StringGenerator::Random(int32 seed, int n) { - if (acm_ == NULL) - acm_ = new ACMRandom(seed); - else - acm_->Reset(seed); +void StringGenerator::Random(int32_t seed, int n) { + rng_.seed(seed); random_ = true; nrandom_ = n; diff --git a/re2/testing/string_generator.h b/re2/testing/string_generator.h index ff5a711..5a36617 100644 --- a/re2/testing/string_generator.h +++ b/re2/testing/string_generator.h @@ -9,18 +9,21 @@ // maxlen letters using the set of letters in alpha. // Fetch strings using a Java-like Next()/HasNext() interface. +#include <stdint.h> +#include <random> #include <string> #include <vector> + #include "util/util.h" -#include "util/random.h" #include "re2/stringpiece.h" namespace re2 { class StringGenerator { public: - StringGenerator(int maxlen, const vector<string>& alphabet); - ~StringGenerator(); + StringGenerator(int maxlen, const std::vector<string>& alphabet); + ~StringGenerator() {} + const StringPiece& Next(); bool HasNext() { return hasnext_; } @@ -28,7 +31,7 @@ class StringGenerator { void Reset(); // Causes generator to emit random strings for next n calls to Next(). - void Random(int32 seed, int n); + void Random(int32_t seed, int n); // Causes generator to emit a NULL as the next call. void GenerateNULL(); @@ -38,19 +41,21 @@ class StringGenerator { bool RandomDigits(); // Global state. - int maxlen_; // Maximum length string to generate. - vector<string> alphabet_; // Alphabet, one string per letter. + int maxlen_; // Maximum length string to generate. + std::vector<string> alphabet_; // Alphabet, one string per letter. // Iteration state. StringPiece sp_; // Last StringPiece returned by Next(). string s_; // String data in last StringPiece returned by Next(). bool hasnext_; // Whether Next() can be called again. - vector<int> digits_; // Alphabet indices for next string. + std::vector<int> digits_; // Alphabet indices for next string. bool generate_null_; // Whether to generate a NULL StringPiece next. bool random_; // Whether generated strings are random. int nrandom_; // Number of random strings left to generate. - ACMRandom* acm_; // Random number generator - DISALLOW_COPY_AND_ASSIGN(StringGenerator); + std::minstd_rand0 rng_; // Random number generator. + + StringGenerator(const StringGenerator&) = delete; + StringGenerator& operator=(const StringGenerator&) = delete; }; } // namespace re2 diff --git a/re2/testing/string_generator_test.cc b/re2/testing/string_generator_test.cc index d13401a..dcdc68a 100644 --- a/re2/testing/string_generator_test.cc +++ b/re2/testing/string_generator_test.cc @@ -4,18 +4,19 @@ // Test StringGenerator. -#include <stdlib.h> +#include <stdint.h> #include <string> -#include <vector> + #include "util/test.h" +#include "util/utf.h" #include "re2/testing/string_generator.h" #include "re2/testing/regexp_generator.h" namespace re2 { // Returns i to the e. -static int64 IntegerPower(int i, int e) { - int64 p = 1; +static int64_t IntegerPower(int i, int e) { + int64_t p = 1; while (e-- > 0) p *= i; return p; @@ -46,7 +47,7 @@ static void RunTest(int len, string alphabet, bool donull) { } while (g.HasNext()) { - string s = g.Next().as_string(); + string s = g.Next().ToString(); n++; // Check that all characters in s appear in alphabet. @@ -69,7 +70,7 @@ static void RunTest(int len, string alphabet, bool donull) { } // Check total string count. - int64 m = 0; + int64_t m = 0; int alpha = utflen(alphabet.c_str()); if (alpha == 0) // Degenerate case. len = 0; diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc index 62cbb6c..59779f7 100644 --- a/re2/testing/tester.cc +++ b/re2/testing/tester.cc @@ -4,8 +4,16 @@ // Regular expression engine tester -- test all the implementations against each other. +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <string> + #include "util/util.h" #include "util/flags.h" +#include "util/logging.h" +#include "util/strutil.h" #include "re2/testing/tester.h" #include "re2/prog.h" #include "re2/re2.h" @@ -40,7 +48,7 @@ const char* engine_names[kEngineMax] = { }; // Returns the name of the engine. -static StringPiece EngineName(Engine e) { +static const char* EngineName(Engine e) { CHECK_GE(e, 0); CHECK_LT(e, arraysize(engine_names)); CHECK(engine_names[e] != NULL); @@ -48,9 +56,9 @@ static StringPiece EngineName(Engine e) { } // Returns bit mask of engines to use. -static uint32 Engines() { +static uint32_t Engines() { static bool did_parse = false; - static uint32 cached_engines = 0; + static uint32_t cached_engines = 0; if (did_parse) return cached_engines; @@ -59,7 +67,7 @@ static uint32 Engines() { cached_engines = ~0; } else { for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) - if (StringPiece(FLAGS_regexp_engines).contains(EngineName(i))) + if (FLAGS_regexp_engines.find(EngineName(i)) != string::npos) cached_engines |= 1<<i; } @@ -93,15 +101,14 @@ typedef TestInstance::Result Result; static string FormatCapture(const StringPiece& text, const StringPiece& s) { if (s.begin() == NULL) return "(?,?)"; - return StringPrintf("(%d,%d)", - static_cast<int>(s.begin() - text.begin()), - static_cast<int>(s.end() - text.begin())); + return StringPrintf("(%td,%td)", + s.begin() - text.begin(), s.end() - text.begin()); } // Returns whether text contains non-ASCII (>= 0x80) bytes. static bool NonASCII(const StringPiece& text) { - for (int i = 0; i < text.size(); i++) - if ((uint8)text[i] >= 0x80) + for (size_t i = 0; i < text.size(); i++) + if ((uint8_t)text[i] >= 0x80) return true; return false; } @@ -154,7 +161,7 @@ static string FormatMode(Regexp::ParseFlags flags) { for (int i = 0; i < arraysize(parse_modes); i++) if (parse_modes[i].parse_flags == flags) return parse_modes[i].desc; - return StringPrintf("%#x", static_cast<uint>(flags)); + return StringPrintf("%#x", static_cast<uint32_t>(flags)); } // Constructs and saves all the matching engines that @@ -214,7 +221,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind, } // Create re string that will be used for RE and RE2. - string re = regexp_str.as_string(); + string re = regexp_str.ToString(); // Accomodate flags. // Regexp::Latin1 will be accomodated below. if (!(flags & Regexp::OneLine)) @@ -395,8 +402,8 @@ void TestInstance::RunSearch(Engine type, result->matched = re2_->Match( context, - static_cast<int>(text.begin() - context.begin()), - static_cast<int>(text.end() - context.begin()), + static_cast<size_t>(text.begin() - context.begin()), + static_cast<size_t>(text.end() - context.begin()), re_anchor, result->submatch, nsubmatch); @@ -411,10 +418,23 @@ void TestInstance::RunSearch(Engine type, break; } + // In Perl/PCRE, \v matches any character considered vertical + // whitespace, not just vertical tab. Regexp::MimicsPCRE() is + // unable to handle all cases of this, unfortunately, so just + // catch them here. :( + if (regexp_str_.find("\\v") != StringPiece::npos && + (text.find('\n') != StringPiece::npos || + text.find('\f') != StringPiece::npos || + text.find('\r') != StringPiece::npos)) { + result->skipped = true; + break; + } + // PCRE 8.34 or so started allowing vertical tab to match \s, // following a change made in Perl 5.18. RE2 does not. - if ((regexp_str_.contains("\\s") || regexp_str_.contains("\\S")) && - text.contains("\v")) { + if ((regexp_str_.find("\\s") != StringPiece::npos || + regexp_str_.find("\\S") != StringPiece::npos) && + text.find('\v') != StringPiece::npos) { result->skipped = true; break; } @@ -425,7 +445,7 @@ void TestInstance::RunSearch(Engine type, a[i] = PCRE::Arg(&result->submatch[i]); argptr[i] = &a[i]; } - int consumed; + size_t consumed; PCRE::Anchor pcre_anchor; if (anchor == Prog::kAnchored) pcre_anchor = PCRE::ANCHOR_START; diff --git a/re2/testing/tester.h b/re2/testing/tester.h index 07291d2..112c6ec 100644 --- a/re2/testing/tester.h +++ b/re2/testing/tester.h @@ -8,6 +8,8 @@ // Comparative tester for regular expression matching. // Checks all implementations against each other. +#include <vector> + #include "re2/stringpiece.h" #include "re2/prog.h" #include "re2/regexp.h" @@ -16,8 +18,6 @@ namespace re2 { -class Regexp; - // All the supported regexp engines. enum Engine { kEngineBacktrack = 0, // Prog::UnsafeSearchBacktrack @@ -84,7 +84,8 @@ class TestInstance { PCRE* re_; // PCRE implementation RE2* re2_; // RE2 implementation - DISALLOW_COPY_AND_ASSIGN(TestInstance); + TestInstance(const TestInstance&) = delete; + TestInstance& operator=(const TestInstance&) = delete; }; // A group of TestInstances for all possible configurations. @@ -108,9 +109,10 @@ class Tester { private: bool error_; - vector<TestInstance*> v_; + std::vector<TestInstance*> v_; - DISALLOW_COPY_AND_ASSIGN(Tester); + Tester(const Tester&) = delete; + Tester& operator=(const Tester&) = delete; }; // Run all possible tests using regexp and text. diff --git a/re2/tostring.cc b/re2/tostring.cc index 0230c8c..fc9faca 100644 --- a/re2/tostring.cc +++ b/re2/tostring.cc @@ -5,7 +5,13 @@ // Format a regular expression structure as a string. // Tested by parse_test.cc +#include <string.h> +#include <string> + #include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/regexp.h" #include "re2/walker-inl.h" @@ -42,7 +48,8 @@ class ToStringWalker : public Regexp::Walker<int> { private: string* t_; // The string the walker appends to. - DISALLOW_COPY_AND_ASSIGN(ToStringWalker); + ToStringWalker(const ToStringWalker&) = delete; + ToStringWalker& operator=(const ToStringWalker&) = delete; }; string Regexp::ToString() { diff --git a/re2/unicode_casefold.h b/re2/unicode_casefold.h index 164ca41..8bdbb42 100644 --- a/re2/unicode_casefold.h +++ b/re2/unicode_casefold.h @@ -19,7 +19,7 @@ // 'K' -> 'K' // // Like everything Unicode, these tables are big. If we represent the table -// as a sorted list of uint32 pairs, it has 2049 entries and is 16 kB. +// as a sorted list of uint32_t pairs, it has 2049 entries and is 16 kB. // Most table entries look like the ones around them: // 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc. // Instead of listing all the pairs explicitly, we make a list of ranges @@ -39,7 +39,10 @@ // The grouped form also allows for efficient fold range calculations // rather than looping one character at a time. +#include <stdint.h> + #include "util/util.h" +#include "util/utf.h" namespace re2 { @@ -53,7 +56,7 @@ enum { struct CaseFold { Rune lo; Rune hi; - int32 delta; + int32_t delta; }; extern const CaseFold unicode_casefold[]; diff --git a/re2/unicode_groups.h b/re2/unicode_groups.h index d61cd83..75f55da 100644 --- a/re2/unicode_groups.h +++ b/re2/unicode_groups.h @@ -18,14 +18,17 @@ // to 16.5 kB of data but make the data harder to use; // we don't bother. +#include <stdint.h> + #include "util/util.h" +#include "util/utf.h" namespace re2 { struct URange16 { - uint16 lo; - uint16 hi; + uint16_t lo; + uint16_t hi; }; struct URange32 diff --git a/re2/walker-inl.h b/re2/walker-inl.h index 6a1113a..032b8ac 100644 --- a/re2/walker-inl.h +++ b/re2/walker-inl.h @@ -13,6 +13,9 @@ // Not quite the Visitor pattern, because (among other things) // the Visitor pattern is recursive. +#include <stack> + +#include "util/logging.h" #include "re2/regexp.h" namespace re2 { @@ -86,13 +89,14 @@ template<typename T> class Regexp::Walker { private: // Walk state for the entire traversal. - stack<WalkState<T> >* stack_; + std::stack<WalkState<T> >* stack_; bool stopped_early_; int max_visits_; T WalkInternal(Regexp* re, T top_arg, bool use_copy); - DISALLOW_COPY_AND_ASSIGN(Walker); + Walker(const Walker&) = delete; + Walker& operator=(const Walker&) = delete; }; template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re, @@ -130,7 +134,7 @@ template<typename T> struct WalkState { }; template<typename T> Regexp::Walker<T>::Walker() { - stack_ = new stack<WalkState<T> >; + stack_ = new std::stack<WalkState<T> >; stopped_early_ = false; } diff --git a/testinstall.cc b/testinstall.cc index 97990c2..47db4e6 100644 --- a/testinstall.cc +++ b/testinstall.cc @@ -6,15 +6,13 @@ #include <re2/filtered_re2.h> #include <stdio.h> -using namespace re2; - int main(void) { - FilteredRE2 f; + re2::FilteredRE2 f; int id; f.Add("a.*b.*c", RE2::DefaultOptions, &id); - vector<string> v; + std::vector<std::string> v; f.Compile(&v); - vector<int> ids; + std::vector<int> ids; f.FirstMatch("abbccc", ids); if(RE2::FullMatch("axbyc", "a.*b.*c")) { diff --git a/util/benchmark.cc b/util/benchmark.cc index 20b6765..5ca715a 100644 --- a/util/benchmark.cc +++ b/util/benchmark.cc @@ -2,6 +2,11 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include <stdint.h> +#include <stdio.h> +#include <algorithm> +#include <chrono> + #include "util/util.h" #include "util/flags.h" #include "util/benchmark.h" @@ -9,8 +14,11 @@ DEFINE_string(test_tmpdir, "/var/tmp", "temp directory"); +#ifdef _WIN32 +#define snprintf _snprintf +#endif + using testing::Benchmark; -using namespace re2; static Benchmark* benchmarks[10000]; static int nbenchmarks; @@ -24,42 +32,17 @@ void Benchmark::Register() { nbenchmarks++; } -static int64 nsec() { -#if defined(__APPLE__) - struct timeval tv; - if(gettimeofday(&tv, 0) < 0) - return -1; - return (int64)tv.tv_sec*1000*1000*1000 + tv.tv_usec*1000; -#elif defined(_WIN32) - // https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408.aspx - // describes how to query ticks and convert to microseconds. Of course, - // what we want in this case are nanoseconds. Also, note that .QuadPart - // is a signed 64-bit integer, so casting to int64 shouldn't be needed. - LARGE_INTEGER freq; - QueryPerformanceFrequency(&freq); - LARGE_INTEGER ticks; - QueryPerformanceCounter(&ticks); - ticks.QuadPart *= 1000*1000*1000; - ticks.QuadPart /= freq.QuadPart; - return ticks.QuadPart; -#else - struct timespec tp; -#ifdef CLOCK_PROCESS_CPUTIME_ID - if(clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tp) < 0) -#else - if(clock_gettime(CLOCK_REALTIME, &tp) < 0) -#endif - return -1; - return (int64)tp.tv_sec*1000*1000*1000 + tp.tv_nsec; -#endif +static int64_t nsec() { + return std::chrono::duration_cast<std::chrono::nanoseconds>( + std::chrono::steady_clock::now().time_since_epoch()).count(); } -static int64 bytes; -static int64 ns; -static int64 t0; -static int64 items; +static int64_t bytes; +static int64_t ns; +static int64_t t0; +static int64_t items; -void SetBenchmarkBytesProcessed(long long x) { +void SetBenchmarkBytesProcessed(int64_t x) { bytes = x; } @@ -132,7 +115,7 @@ void RunBench(Benchmark* b, int nthread, int siz) { else n = (int)1e9 / static_cast<int>(ns/n); - n = max(last+1, min(n+n/2, 100*last)); + n = std::max(last+1, std::min(n+n/2, 100*last)); n = round(n); runN(b, n, siz); } @@ -169,7 +152,7 @@ int main(int argc, const char** argv) { Benchmark* b = benchmarks[i]; if(match(b->name, argc, argv)) for(int j = b->threadlo; j <= b->threadhi; j++) - for(int k = max(b->lo, 1); k <= max(b->hi, 1); k<<=1) + for(int k = std::max(b->lo, 1); k <= std::max(b->hi, 1); k<<=1) RunBench(b, j, k); } } diff --git a/util/benchmark.h b/util/benchmark.h index 694565f..fba30b9 100644 --- a/util/benchmark.h +++ b/util/benchmark.h @@ -5,6 +5,8 @@ #ifndef UTIL_BENCHMARK_H_ #define UTIL_BENCHMARK_H_ +#include <stdint.h> + namespace testing { struct Benchmark { const char* name; @@ -23,7 +25,7 @@ struct Benchmark { }; } // namespace testing -void SetBenchmarkBytesProcessed(long long); +void SetBenchmarkBytesProcessed(int64_t); void StopBenchmarkTiming(); void StartBenchmarkTiming(); void BenchmarkMemoryUsage(); diff --git a/util/flags.h b/util/flags.h index 1fd5c91..5af1320 100644 --- a/util/flags.h +++ b/util/flags.h @@ -10,6 +10,8 @@ // If you want to do that, see // https://gflags.github.io/gflags/ +#include <stdint.h> + #define DEFINE_flag(type, name, deflt, desc) \ namespace re2 { type FLAGS_##name = deflt; } @@ -17,11 +19,11 @@ namespace re2 { extern type FLAGS_##name; } #define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc) -#define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32, name, deflt, desc) +#define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32_t, name, deflt, desc) #define DEFINE_string(name, deflt, desc) DEFINE_flag(string, name, deflt, desc) #define DECLARE_bool(name) DECLARE_flag(bool, name) -#define DECLARE_int32(name) DECLARE_flag(int32, name) +#define DECLARE_int32(name) DECLARE_flag(int32_t, name) #define DECLARE_string(name) DECLARE_flag(string, name) #endif // UTIL_FLAGS_H_ diff --git a/util/hash.cc b/util/hash.cc deleted file mode 100644 index dfef7b7..0000000 --- a/util/hash.cc +++ /dev/null @@ -1,231 +0,0 @@ -// Modified by Russ Cox to add "namespace re2". -// Also threw away all but hashword and hashword2. -// http://burtleburtle.net/bob/c/lookup3.c - -/* -------------------------------------------------------------------------------- -lookup3.c, by Bob Jenkins, May 2006, Public Domain. - -These are functions for producing 32-bit hashes for hash table lookup. -hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() -are externally useful functions. Routines to test the hash are included -if SELF_TEST is defined. You can use this free for any purpose. It's in -the public domain. It has no warranty. - -You probably want to use hashlittle(). hashlittle() and hashbig() -hash byte arrays. hashlittle() is is faster than hashbig() on -little-endian machines. Intel and AMD are little-endian machines. -On second thought, you probably want hashlittle2(), which is identical to -hashlittle() except it returns two 32-bit hashes for the price of one. -You could implement hashbig2() if you wanted but I haven't bothered here. - -If you want to find a hash of, say, exactly 7 integers, do - a = i1; b = i2; c = i3; - mix(a,b,c); - a += i4; b += i5; c += i6; - mix(a,b,c); - a += i7; - final(a,b,c); -then use c as the hash value. If you have a variable length array of -4-byte integers to hash, use hashword(). If you have a byte array (like -a character string), use hashlittle(). If you have several byte arrays, or -a mix of things, see the comments above hashlittle(). - -Why is this so big? I read 12 bytes at a time into 3 4-byte integers, -then mix those integers. This is fast (you can do a lot more thorough -mixing with 12*3 instructions on 3 integers than you can with 3 instructions -on 1 byte), but shoehorning those bytes into integers efficiently is messy. -------------------------------------------------------------------------------- -*/ - -#include "util/util.h" - -#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) - -/* -------------------------------------------------------------------------------- -mix -- mix 3 32-bit values reversibly. - -This is reversible, so any information in (a,b,c) before mix() is -still in (a,b,c) after mix(). - -If four pairs of (a,b,c) inputs are run through mix(), or through -mix() in reverse, there are at least 32 bits of the output that -are sometimes the same for one pair and different for another pair. -This was tested for: -* pairs that differed by one bit, by two bits, in any combination - of top bits of (a,b,c), or in any combination of bottom bits of - (a,b,c). -* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - is commonly produced by subtraction) look like a single 1-bit - difference. -* the base values were pseudorandom, all zero but one bit set, or - all zero plus a counter that starts at zero. - -Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that -satisfy this are - 4 6 8 16 19 4 - 9 15 3 18 27 15 - 14 9 3 7 17 3 -Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing -for "differ" defined as + with a one-bit base and a two-bit delta. I -used http://burtleburtle.net/bob/hash/avalanche.html to choose -the operations, constants, and arrangements of the variables. - -This does not achieve avalanche. There are input bits of (a,b,c) -that fail to affect some output bits of (a,b,c), especially of a. The -most thoroughly mixed value is c, but it doesn't really even achieve -avalanche in c. - -This allows some parallelism. Read-after-writes are good at doubling -the number of bits affected, so the goal of mixing pulls in the opposite -direction as the goal of parallelism. I did what I could. Rotates -seem to cost as much as shifts on every machine I could lay my hands -on, and rotates are much kinder to the top and bottom bits, so I used -rotates. -------------------------------------------------------------------------------- -*/ -#define mix(a,b,c) \ -{ \ - a -= c; a ^= rot(c, 4); c += b; \ - b -= a; b ^= rot(a, 6); a += c; \ - c -= b; c ^= rot(b, 8); b += a; \ - a -= c; a ^= rot(c,16); c += b; \ - b -= a; b ^= rot(a,19); a += c; \ - c -= b; c ^= rot(b, 4); b += a; \ -} - -/* -------------------------------------------------------------------------------- -final -- final mixing of 3 32-bit values (a,b,c) into c - -Pairs of (a,b,c) values differing in only a few bits will usually -produce values of c that look totally different. This was tested for -* pairs that differed by one bit, by two bits, in any combination - of top bits of (a,b,c), or in any combination of bottom bits of - (a,b,c). -* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - is commonly produced by subtraction) look like a single 1-bit - difference. -* the base values were pseudorandom, all zero but one bit set, or - all zero plus a counter that starts at zero. - -These constants passed: - 14 11 25 16 4 14 24 - 12 14 25 16 4 14 24 -and these came close: - 4 8 15 26 3 22 24 - 10 8 15 26 3 22 24 - 11 8 15 26 3 22 24 -------------------------------------------------------------------------------- -*/ -#define final(a,b,c) \ -{ \ - c ^= b; c -= rot(b,14); \ - a ^= c; a -= rot(c,11); \ - b ^= a; b -= rot(a,25); \ - c ^= b; c -= rot(b,16); \ - a ^= c; a -= rot(c,4); \ - b ^= a; b -= rot(a,14); \ - c ^= b; c -= rot(b,24); \ -} - -namespace re2 { - -/* --------------------------------------------------------------------- - This works on all machines. To be useful, it requires - -- that the key be an array of uint32_t's, and - -- that the length be the number of uint32_t's in the key - - The function hashword() is identical to hashlittle() on little-endian - machines, and identical to hashbig() on big-endian machines, - except that the length has to be measured in uint32_ts rather than in - bytes. hashlittle() is more complicated than hashword() only because - hashlittle() has to dance around fitting the key bytes into registers. --------------------------------------------------------------------- -*/ -uint32 hashword( -const uint32 *k, /* the key, an array of uint32_t values */ -size_t length, /* the length of the key, in uint32_ts */ -uint32 initval) /* the previous hash, or an arbitrary value */ -{ - uint32_t a,b,c; - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; - - /*------------------------------------------------- handle most of the key */ - while (length > 3) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 3; - k += 3; - } - - /*------------------------------------------- handle the last 3 uint32_t's */ - switch(length) /* all the case statements fall through */ - { - case 3 : c+=k[2]; - case 2 : b+=k[1]; - case 1 : a+=k[0]; - final(a,b,c); - case 0: /* case 0: nothing left to add */ - break; - } - /*------------------------------------------------------ report the result */ - return c; -} - - -/* --------------------------------------------------------------------- -hashword2() -- same as hashword(), but take two seeds and return two -32-bit values. pc and pb must both be nonnull, and *pc and *pb must -both be initialized with seeds. If you pass in (*pb)==0, the output -(*pc) will be the same as the return value from hashword(). --------------------------------------------------------------------- -*/ -void hashword2 ( -const uint32 *k, /* the key, an array of uint32_t values */ -size_t length, /* the length of the key, in uint32_ts */ -uint32 *pc, /* IN: seed OUT: primary hash value */ -uint32 *pb) /* IN: more seed OUT: secondary hash value */ -{ - uint32_t a,b,c; - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; - c += *pb; - - /*------------------------------------------------- handle most of the key */ - while (length > 3) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 3; - k += 3; - } - - /*------------------------------------------- handle the last 3 uint32_t's */ - switch(length) /* all the case statements fall through */ - { - case 3 : c+=k[2]; - case 2 : b+=k[1]; - case 1 : a+=k[0]; - final(a,b,c); - case 0: /* case 0: nothing left to add */ - break; - } - /*------------------------------------------------------ report the result */ - *pc=c; *pb=b; -} - -} // namespace re2 diff --git a/util/logging.h b/util/logging.h index 1573b18..d1044b3 100644 --- a/util/logging.h +++ b/util/logging.h @@ -7,7 +7,9 @@ // Simplified version of Google's logging. +#include <assert.h> #include <stdio.h> /* for fwrite */ +#include <ostream> #include <sstream> #include "util/util.h" @@ -45,10 +47,8 @@ DECLARE_int32(minloglevel); #endif #ifdef NDEBUG -#define DEBUG_MODE 0 #define LOG_DFATAL LOG_ERROR #else -#define DEBUG_MODE 1 #define LOG_DFATAL LOG_FATAL #endif @@ -76,18 +76,22 @@ class LogMessage { Flush(); } } - ostream& stream() { return str_; } + std::ostream& stream() { return str_; } private: const int severity_; bool flushed_; std::ostringstream str_; - DISALLOW_COPY_AND_ASSIGN(LogMessage); + + LogMessage(const LogMessage&) = delete; + LogMessage& operator=(const LogMessage&) = delete; }; -#ifdef _WIN32 +// Silence "destructor never returns" warning for ~LogMessageFatal(). +// Since this is a header file, push and then pop to limit the scope. +#ifdef _MSC_VER #pragma warning(push) -#pragma warning(disable: 4722) // destructor never returns +#pragma warning(disable: 4722) #endif class LogMessageFatal : public LogMessage { @@ -99,10 +103,11 @@ class LogMessageFatal : public LogMessage { abort(); } private: - DISALLOW_COPY_AND_ASSIGN(LogMessageFatal); + LogMessageFatal(const LogMessageFatal&) = delete; + LogMessageFatal& operator=(const LogMessageFatal&) = delete; }; -#ifdef _WIN32 +#ifdef _MSC_VER #pragma warning(pop) #endif diff --git a/util/mix.h b/util/mix.h new file mode 100644 index 0000000..b36417c --- /dev/null +++ b/util/mix.h @@ -0,0 +1,30 @@ +// Copyright 2016 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef UTIL_MIX_H_ +#define UTIL_MIX_H_ + +#include <stddef.h> +#include <limits> + +namespace re2 { + +class HashMix { + public: + HashMix() : hash_(1) {} + explicit HashMix(size_t val) : hash_(val + 83) {} + void Mix(size_t val) { + static const size_t kMul = static_cast<size_t>(0xdc3eb94af8ab4c93ULL); + hash_ *= kMul; + hash_ = ((hash_ << 19) | + (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val; + } + size_t get() const { return hash_; } + private: + size_t hash_; +}; + +} // namespace re2 + +#endif // UTIL_MIX_H_ diff --git a/util/mutex.h b/util/mutex.h index 81121a4..9c49158 100644 --- a/util/mutex.h +++ b/util/mutex.h @@ -10,79 +10,33 @@ * You should assume the locks are *not* re-entrant. */ -#include <stdlib.h> - #if !defined(_WIN32) -#include <unistd.h> // For POSIX options +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif +#include <unistd.h> +#if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0 +#define MUTEX_IS_PTHREAD_RWLOCK #endif - -namespace re2 { - -#if !defined(_WIN32) - // Possible values of POSIX options: - // -1 means not supported, - // 0 means maybe supported (query at runtime), - // >0 means supported. -# if defined(_POSIX_THREADS) && _POSIX_THREADS > 0 -# define HAVE_PTHREAD 1 -# else -# define HAVE_PTHREAD 0 -# endif -# if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0 -# define HAVE_RWLOCK 1 -# else -# define HAVE_RWLOCK 0 -# endif -#else -# define HAVE_PTHREAD 0 -# define HAVE_RWLOCK 0 #endif -#if defined(NO_THREADS) - typedef int MutexType; // to keep a lock-count -#elif HAVE_PTHREAD && HAVE_RWLOCK - // Needed for pthread_rwlock_*. If it causes problems, you could take it - // out, but then you'd have to set HAVE_RWLOCK to 0 (at least on linux -- - // it *does* cause problems for FreeBSD, or MacOSX, but isn't needed - // for locking there.) -# ifdef __linux__ -# undef _XOPEN_SOURCE -# define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls -# endif -# include <pthread.h> - typedef pthread_rwlock_t MutexType; -#elif HAVE_PTHREAD -# include <pthread.h> - typedef pthread_mutex_t MutexType; -#elif defined(_WIN32) -# ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN // We only need minimal includes -# endif -# ifdef GMUTEX_TRYLOCK - // We need Windows NT or later for TryEnterCriticalSection(). If you - // don't need that functionality, you can remove these _WIN32_WINNT - // lines, and change TryLock() to assert(0) or something. -# ifndef _WIN32_WINNT -# define _WIN32_WINNT 0x0400 -# endif -# endif -# include <windows.h> - typedef CRITICAL_SECTION MutexType; +#if defined(MUTEX_IS_PTHREAD_RWLOCK) +#include <pthread.h> +#include <stdlib.h> +typedef pthread_rwlock_t MutexType; #else -# error Need to implement mutex.h for your architecture, or #define NO_THREADS +#include <mutex> +typedef std::mutex MutexType; #endif +namespace re2 { + class Mutex { public: - // Create a Mutex that is not held by anybody. inline Mutex(); - - // Destructor inline ~Mutex(); - inline void Lock(); // Block if needed until free then acquire exclusively inline void Unlock(); // Release a lock acquired via Lock() - inline bool TryLock(); // If free, Lock() and return true, else return false // Note that on systems that don't support read-write locks, these may // be implemented as synonyms to Lock() and Unlock(). So you can use // these for efficiency, but don't use them anyplace where being able @@ -91,80 +45,44 @@ class Mutex { inline void ReaderUnlock(); // Release a read share of this Mutex inline void WriterLock() { Lock(); } // Acquire an exclusive lock inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock() - inline void AssertHeld() { } private: MutexType mutex_; // Catch the error of writing Mutex when intending MutexLock. Mutex(Mutex *ignored); - // Disallow "evil" constructors - Mutex(const Mutex&); - void operator=(const Mutex&); -}; -// Now the implementation of Mutex for various systems -#if defined(NO_THREADS) - -// When we don't have threads, we can be either reading or writing, -// but not both. We can have lots of readers at once (in no-threads -// mode, that's most likely to happen in recursive function calls), -// but only one writer. We represent this by having mutex_ be -1 when -// writing and a number > 0 when reading (and 0 when no lock is held). -// -// In debug mode, we assert these invariants, while in non-debug mode -// we do nothing, for efficiency. That's why everything is in an -// assert. -#include <assert.h> - -Mutex::Mutex() : mutex_(0) { } -Mutex::~Mutex() { assert(mutex_ == 0); } -void Mutex::Lock() { assert(--mutex_ == -1); } -void Mutex::Unlock() { assert(mutex_++ == -1); } -bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; } -void Mutex::ReaderLock() { assert(++mutex_ > 0); } -void Mutex::ReaderUnlock() { assert(mutex_-- > 0); } + Mutex(const Mutex&) = delete; + Mutex& operator=(const Mutex&) = delete; +}; -#elif HAVE_PTHREAD && HAVE_RWLOCK +#if defined(MUTEX_IS_PTHREAD_RWLOCK) -#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0) +#define SAFE_PTHREAD(fncall) \ + do { \ + if ((fncall) != 0) abort(); \ + } while (0) Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); } Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); } void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); } void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } -bool Mutex::TryLock() { return pthread_rwlock_trywrlock(&mutex_) == 0; } void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); } void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } #undef SAFE_PTHREAD -#elif HAVE_PTHREAD - -#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0) - -Mutex::Mutex() { SAFE_PTHREAD(pthread_mutex_init(&mutex_, NULL)); } -Mutex::~Mutex() { SAFE_PTHREAD(pthread_mutex_destroy(&mutex_)); } -void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock(&mutex_)); } -void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock(&mutex_)); } -bool Mutex::TryLock() { return pthread_mutex_trylock(&mutex_) == 0; } -void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks -void Mutex::ReaderUnlock() { Unlock(); } -#undef SAFE_PTHREAD - -#elif defined(_WIN32) +#else -Mutex::Mutex() { InitializeCriticalSection(&mutex_); } -Mutex::~Mutex() { DeleteCriticalSection(&mutex_); } -void Mutex::Lock() { EnterCriticalSection(&mutex_); } -void Mutex::Unlock() { LeaveCriticalSection(&mutex_); } -bool Mutex::TryLock() { return TryEnterCriticalSection(&mutex_) != 0; } -void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks +Mutex::Mutex() { } +Mutex::~Mutex() { } +void Mutex::Lock() { mutex_.lock(); } +void Mutex::Unlock() { mutex_.unlock(); } +void Mutex::ReaderLock() { Lock(); } // C++11 doesn't have std::shared_mutex. void Mutex::ReaderUnlock() { Unlock(); } #endif - // -------------------------------------------------------------------------- // Some helper classes @@ -175,9 +93,9 @@ class MutexLock { ~MutexLock() { mu_->Unlock(); } private: Mutex * const mu_; - // Disallow "evil" constructors - MutexLock(const MutexLock&); - void operator=(const MutexLock&); + + MutexLock(const MutexLock&) = delete; + MutexLock& operator=(const MutexLock&) = delete; }; // ReaderMutexLock and WriterMutexLock do the same, for rwlocks @@ -187,9 +105,9 @@ class ReaderMutexLock { ~ReaderMutexLock() { mu_->ReaderUnlock(); } private: Mutex * const mu_; - // Disallow "evil" constructors - ReaderMutexLock(const ReaderMutexLock&); - void operator=(const ReaderMutexLock&); + + ReaderMutexLock(const ReaderMutexLock&) = delete; + ReaderMutexLock& operator=(const ReaderMutexLock&) = delete; }; class WriterMutexLock { @@ -198,15 +116,15 @@ class WriterMutexLock { ~WriterMutexLock() { mu_->WriterUnlock(); } private: Mutex * const mu_; - // Disallow "evil" constructors - WriterMutexLock(const WriterMutexLock&); - void operator=(const WriterMutexLock&); + + WriterMutexLock(const WriterMutexLock&) = delete; + WriterMutexLock& operator=(const WriterMutexLock&) = delete; }; // Catch bug where variable name is omitted, e.g. MutexLock (&mu); -#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name) -#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name) -#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name) +#define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name") +#define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name") +#define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name") } // namespace re2 diff --git a/util/pcre.cc b/util/pcre.cc index 87affdc..2d0f5df 100644 --- a/util/pcre.cc +++ b/util/pcre.cc @@ -6,11 +6,20 @@ // The main changes are the addition of the HitLimit method and // compilation as PCRE in namespace re2. +#include <assert.h> +#include <ctype.h> #include <errno.h> +#include <stdlib.h> +#include <string.h> #include <limits> +#include <string> +#include <utility> + #include "util/util.h" #include "util/flags.h" +#include "util/logging.h" #include "util/pcre.h" +#include "util/strutil.h" #define PCREPORT(level) LOG(level) @@ -214,7 +223,7 @@ bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text, if (&a15 == &no_more_args) goto done; args[n++] = &a15; done: - int consumed; + size_t consumed; int vec[kVecSize] = {}; return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize); } @@ -257,7 +266,7 @@ bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text, if (&a15 == &no_more_args) goto done; args[n++] = &a15; done: - int consumed; + size_t consumed; int vec[kVecSize] = {}; return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize); } @@ -300,7 +309,7 @@ bool PCRE::ConsumeFunctor::operator ()(StringPiece* input, if (&a15 == &no_more_args) goto done; args[n++] = &a15; done: - int consumed; + size_t consumed; int vec[kVecSize] = {}; if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed, args, n, vec, kVecSize)) { @@ -349,7 +358,7 @@ bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input, if (&a15 == &no_more_args) goto done; args[n++] = &a15; done: - int consumed; + size_t consumed; int vec[kVecSize] = {}; if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed, args, n, vec, kVecSize)) { @@ -384,10 +393,10 @@ int PCRE::GlobalReplace(string *str, int count = 0; int vec[kVecSize] = {}; string out; - int start = 0; + size_t start = 0; bool last_match_was_empty_string = false; - while (start <= static_cast<int>(str->size())) { + while (start <= str->size()) { // If the previous match was for the empty string, we shouldn't // just match again: we'll match in the same way and get an // infinite loop. Instead, we do the match in a special way: @@ -403,7 +412,7 @@ int PCRE::GlobalReplace(string *str, matches = pattern.TryMatch(*str, start, ANCHOR_START, false, vec, kVecSize); if (matches <= 0) { - if (start < static_cast<int>(str->size())) + if (start < str->size()) out.push_back((*str)[start]); start++; last_match_was_empty_string = false; @@ -415,7 +424,7 @@ int PCRE::GlobalReplace(string *str, if (matches <= 0) break; } - int matchstart = vec[0], matchend = vec[1]; + size_t matchstart = vec[0], matchend = vec[1]; assert(matchstart >= start); assert(matchend >= matchstart); @@ -429,8 +438,9 @@ int PCRE::GlobalReplace(string *str, if (count == 0) return 0; - if (start < static_cast<int>(str->size())) - out.append(*str, start, static_cast<int>(str->size()) - start); + if (start < str->size()) + out.append(*str, start, str->size() - start); + using std::swap; swap(out, *str); return count; } @@ -458,7 +468,7 @@ string PCRE::QuoteMeta(const StringPiece& unquoted) { // that. (This also makes it identical to the perl function of the // same name except for the null-character special case; // see `perldoc -f quotemeta`.) - for (int ii = 0; ii < unquoted.length(); ++ii) { + for (size_t ii = 0; ii < unquoted.size(); ++ii) { // Note that using 'isalnum' here raises the benchmark time from // 32ns to 58ns: if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && @@ -493,11 +503,11 @@ void PCRE::ClearHitLimit() { } int PCRE::TryMatch(const StringPiece& text, - int startpos, - Anchor anchor, - bool empty_ok, - int *vec, - int vecsize) const { + size_t startpos, + Anchor anchor, + bool empty_ok, + int *vec, + int vecsize) const { pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; if (re == NULL) { PCREPORT(ERROR) << "Matching against invalid re: " << *error_; @@ -533,8 +543,8 @@ int PCRE::TryMatch(const StringPiece& text, int rc = pcre_exec(re, // The regular expression object &extra, (text.data() == NULL) ? "" : text.data(), - text.size(), - startpos, + static_cast<int>(text.size()), + static_cast<int>(startpos), options, vec, vecsize); @@ -589,12 +599,12 @@ int PCRE::TryMatch(const StringPiece& text, } bool PCRE::DoMatchImpl(const StringPiece& text, - Anchor anchor, - int* consumed, - const Arg* const* args, - int n, - int* vec, - int vecsize) const { + Anchor anchor, + size_t* consumed, + const Arg* const* args, + int n, + int* vec, + int vecsize) const { assert((1 + n) * 3 <= vecsize); // results + PCRE workspace int matches = TryMatch(text, 0, anchor, true, vec, vecsize); assert(matches >= 0); // TryMatch never returns negatives @@ -628,10 +638,10 @@ bool PCRE::DoMatchImpl(const StringPiece& text, } bool PCRE::DoMatch(const StringPiece& text, - Anchor anchor, - int* consumed, - const Arg* const args[], - int n) const { + Anchor anchor, + size_t* consumed, + const Arg* const args[], + int n) const { assert(n >= 0); const int vecsize = (1 + n) * 3; // results + PCRE workspace // (as for kVecSize) @@ -730,38 +740,38 @@ int PCRE::NumberOfCapturingGroups() const { /***** Parsers for various types *****/ -bool PCRE::Arg::parse_null(const char* str, int n, void* dest) { +bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) { // We fail if somebody asked us to store into a non-NULL void* pointer return (dest == NULL); } -bool PCRE::Arg::parse_string(const char* str, int n, void* dest) { +bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) { if (dest == NULL) return true; reinterpret_cast<string*>(dest)->assign(str, n); return true; } -bool PCRE::Arg::parse_stringpiece(const char* str, int n, void* dest) { +bool PCRE::Arg::parse_stringpiece(const char* str, size_t n, void* dest) { if (dest == NULL) return true; - reinterpret_cast<StringPiece*>(dest)->set(str, n); + *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n); return true; } -bool PCRE::Arg::parse_char(const char* str, int n, void* dest) { +bool PCRE::Arg::parse_char(const char* str, size_t n, void* dest) { if (n != 1) return false; if (dest == NULL) return true; *(reinterpret_cast<char*>(dest)) = str[0]; return true; } -bool PCRE::Arg::parse_schar(const char* str, int n, void* dest) { +bool PCRE::Arg::parse_schar(const char* str, size_t n, void* dest) { if (n != 1) return false; if (dest == NULL) return true; *(reinterpret_cast<signed char*>(dest)) = str[0]; return true; } -bool PCRE::Arg::parse_uchar(const char* str, int n, void* dest) { +bool PCRE::Arg::parse_uchar(const char* str, size_t n, void* dest) { if (n != 1) return false; if (dest == NULL) return true; *(reinterpret_cast<unsigned char*>(dest)) = str[0]; @@ -778,7 +788,7 @@ static const int kMaxNumberLength = 32; // a. "str" if no termination is needed // b. "buf" if the string was copied and null-terminated // c. "" if the input was invalid and has no hope of being parsed -static const char* TerminateNumber(char* buf, const char* str, int n) { +static const char* TerminateNumber(char* buf, const char* str, size_t n) { if ((n > 0) && isspace(*str)) { // We are less forgiving than the strtoxxx() routines and do not // allow leading spaces. @@ -801,9 +811,9 @@ static const char* TerminateNumber(char* buf, const char* str, int n) { } bool PCRE::Arg::parse_long_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, str, n); @@ -818,16 +828,16 @@ bool PCRE::Arg::parse_long_radix(const char* str, } bool PCRE::Arg::parse_ulong_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, str, n); if (str[0] == '-') { - // strtoul() will silently accept negative numbers and parse - // them. This module is more strict and treats them as errors. - return false; + // strtoul() will silently accept negative numbers and parse + // them. This module is more strict and treats them as errors. + return false; } char* end; @@ -841,9 +851,9 @@ bool PCRE::Arg::parse_ulong_radix(const char* str, } bool PCRE::Arg::parse_short_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { long r; if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse if ((short)r != r) return false; // Out of range @@ -853,9 +863,9 @@ bool PCRE::Arg::parse_short_radix(const char* str, } bool PCRE::Arg::parse_ushort_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { unsigned long r; if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse if ((unsigned short)r != r) return false; // Out of range @@ -865,9 +875,9 @@ bool PCRE::Arg::parse_ushort_radix(const char* str, } bool PCRE::Arg::parse_int_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { long r; if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse if ((int)r != r) return false; // Out of range @@ -877,9 +887,9 @@ bool PCRE::Arg::parse_int_radix(const char* str, } bool PCRE::Arg::parse_uint_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { unsigned long r; if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse if ((unsigned int)r != r) return false; // Out of range @@ -889,26 +899,26 @@ bool PCRE::Arg::parse_uint_radix(const char* str, } bool PCRE::Arg::parse_longlong_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, str, n); char* end; errno = 0; - int64 r = strtoll(str, &end, radix); + long long r = strtoll(str, &end, radix); if (end != str + n) return false; // Leftover junk if (errno) return false; if (dest == NULL) return true; - *(reinterpret_cast<int64*>(dest)) = r; + *(reinterpret_cast<long long*>(dest)) = r; return true; } bool PCRE::Arg::parse_ulonglong_radix(const char* str, - int n, - void* dest, - int radix) { + size_t n, + void* dest, + int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, str, n); @@ -919,24 +929,30 @@ bool PCRE::Arg::parse_ulonglong_radix(const char* str, } char* end; errno = 0; - uint64 r = strtoull(str, &end, radix); + unsigned long long r = strtoull(str, &end, radix); if (end != str + n) return false; // Leftover junk if (errno) return false; if (dest == NULL) return true; - *(reinterpret_cast<uint64*>(dest)) = r; + *(reinterpret_cast<unsigned long long*>(dest)) = r; return true; } -bool PCRE::Arg::parse_double(const char* str, int n, void* dest) { +static bool parse_double_float(const char* str, size_t n, bool isfloat, + void* dest) { if (n == 0) return false; static const int kMaxLength = 200; char buf[kMaxLength]; if (n >= kMaxLength) return false; memcpy(buf, str, n); buf[n] = '\0'; - errno = 0; char* end; - double r = strtod(buf, &end); + errno = 0; + double r; + if (isfloat) { + r = strtof(buf, &end); + } else { + r = strtod(buf, &end); + } if (end != buf + n) { #ifdef _WIN32 // Microsoft's strtod() doesn't handle inf and nan, so we have to @@ -950,11 +966,11 @@ bool PCRE::Arg::parse_double(const char* str, int n, void* dest) { } else if ('+' == *i) { ++i; } - if (0 == stricmp(i, "inf") || 0 == stricmp(i, "infinity")) { + if (0 == _stricmp(i, "inf") || 0 == _stricmp(i, "infinity")) { r = std::numeric_limits<double>::infinity(); if (!pos) r = -r; - } else if (0 == stricmp(i, "nan")) { + } else if (0 == _stricmp(i, "nan")) { r = std::numeric_limits<double>::quiet_NaN(); } else { return false; @@ -965,29 +981,35 @@ bool PCRE::Arg::parse_double(const char* str, int n, void* dest) { } if (errno) return false; if (dest == NULL) return true; - *(reinterpret_cast<double*>(dest)) = r; + if (isfloat) { + *(reinterpret_cast<float*>(dest)) = (float)r; + } else { + *(reinterpret_cast<double*>(dest)) = r; + } return true; } -bool PCRE::Arg::parse_float(const char* str, int n, void* dest) { - double r; - if (!parse_double(str, n, &r)) return false; - if (dest == NULL) return true; - *(reinterpret_cast<float*>(dest)) = static_cast<float>(r); - return true; +bool PCRE::Arg::parse_double(const char* str, size_t n, void* dest) { + return parse_double_float(str, n, false, dest); +} + +bool PCRE::Arg::parse_float(const char* str, size_t n, void* dest) { + return parse_double_float(str, n, true, dest); } #define DEFINE_INTEGER_PARSER(name) \ - bool PCRE::Arg::parse_##name(const char* str, int n, void* dest) { \ + bool PCRE::Arg::parse_##name(const char* str, size_t n, void* dest) { \ return parse_##name##_radix(str, n, dest, 10); \ } \ - bool PCRE::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \ + bool PCRE::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \ return parse_##name##_radix(str, n, dest, 16); \ } \ - bool PCRE::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \ + bool PCRE::Arg::parse_##name##_octal(const char* str, size_t n, \ + void* dest) { \ return parse_##name##_radix(str, n, dest, 8); \ } \ - bool PCRE::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \ + bool PCRE::Arg::parse_##name##_cradix(const char* str, size_t n, \ + void* dest) { \ return parse_##name##_radix(str, n, dest, 0); \ } diff --git a/util/pcre.h b/util/pcre.h index 9ccdf35..7c6403d 100644 --- a/util/pcre.h +++ b/util/pcre.h @@ -248,7 +248,7 @@ class PCRE { // type, or one of: // string (matched piece is copied to string) // StringPiece (StringPiece is mutated to point to matched piece) - // T (where "bool T::ParseFrom(const char*, int)" exists) + // T (where "bool T::ParseFrom(const char*, size_t)" exists) // (void*)NULL (the corresponding matched sub-pattern is not copied) // // Returns true iff all of the following conditions are satisfied: @@ -442,7 +442,7 @@ class PCRE { // "*consumed" if successful. bool DoMatch(const StringPiece& text, Anchor anchor, - int* consumed, + size_t* consumed, const Arg* const* args, int n) const; // Return the number of capturing subpatterns, or -1 if the @@ -465,7 +465,7 @@ class PCRE { // When matching PCRE("(foo)|hello") against "hello", it will return 1. // But the values for all subpattern are filled in into "vec". int TryMatch(const StringPiece& text, - int startpos, + size_t startpos, Anchor anchor, bool empty_ok, int *vec, @@ -482,7 +482,7 @@ class PCRE { // internal implementation for DoMatch bool DoMatchImpl(const StringPiece& text, Anchor anchor, - int* consumed, + size_t* consumed, const Arg* const args[], int n, int* vec, @@ -499,8 +499,10 @@ class PCRE { bool report_errors_; // Silences error logging if false int match_limit_; // Limit on execution resources int stack_limit_; // Limit on stack resources (bytes) - mutable int32_t hit_limit_; // Hit limit during execution (bool)? - DISALLOW_COPY_AND_ASSIGN(PCRE); + mutable int32_t hit_limit_; // Hit limit during execution (bool)? + + PCRE(const PCRE&) = delete; + PCRE& operator=(const PCRE&) = delete; }; // PCRE_Options allow you to set the PCRE::Options, plus any pcre @@ -555,7 +557,7 @@ class PCRE_Options { template <class T> class _PCRE_MatchObject { public: - static inline bool Parse(const char* str, int n, void* dest) { + static inline bool Parse(const char* str, size_t n, void* dest) { if (dest == NULL) return true; T* object = reinterpret_cast<T*>(dest); return object->ParseFrom(str, n); @@ -570,7 +572,7 @@ class PCRE::Arg { // Constructor specially designed for NULL arguments Arg(void*); - typedef bool (*Parser)(const char* str, int n, void* dest); + typedef bool (*Parser)(const char* str, size_t n, void* dest); // Type-specific parsers #define MAKE_PARSER(type, name) \ @@ -604,31 +606,31 @@ class PCRE::Arg { } // Parse the data - bool Parse(const char* str, int n) const; + bool Parse(const char* str, size_t n) const; private: void* arg_; Parser parser_; - static bool parse_null (const char* str, int n, void* dest); - static bool parse_char (const char* str, int n, void* dest); - static bool parse_schar (const char* str, int n, void* dest); - static bool parse_uchar (const char* str, int n, void* dest); - static bool parse_float (const char* str, int n, void* dest); - static bool parse_double (const char* str, int n, void* dest); - static bool parse_string (const char* str, int n, void* dest); - static bool parse_stringpiece (const char* str, int n, void* dest); - -#define DECLARE_INTEGER_PARSER(name) \ - private: \ - static bool parse_##name(const char* str, int n, void* dest); \ - static bool parse_##name##_radix(const char* str, int n, void* dest, \ - int radix); \ - \ - public: \ - static bool parse_##name##_hex(const char* str, int n, void* dest); \ - static bool parse_##name##_octal(const char* str, int n, void* dest); \ - static bool parse_##name##_cradix(const char* str, int n, void* dest) + static bool parse_null (const char* str, size_t n, void* dest); + static bool parse_char (const char* str, size_t n, void* dest); + static bool parse_schar (const char* str, size_t n, void* dest); + static bool parse_uchar (const char* str, size_t n, void* dest); + static bool parse_float (const char* str, size_t n, void* dest); + static bool parse_double (const char* str, size_t n, void* dest); + static bool parse_string (const char* str, size_t n, void* dest); + static bool parse_stringpiece (const char* str, size_t n, void* dest); + +#define DECLARE_INTEGER_PARSER(name) \ + private: \ + static bool parse_##name(const char* str, size_t n, void* dest); \ + static bool parse_##name##_radix(const char* str, size_t n, void* dest, \ + int radix); \ + \ + public: \ + static bool parse_##name##_hex(const char* str, size_t n, void* dest); \ + static bool parse_##name##_octal(const char* str, size_t n, void* dest); \ + static bool parse_##name##_cradix(const char* str, size_t n, void* dest) DECLARE_INTEGER_PARSER(short); DECLARE_INTEGER_PARSER(ushort); @@ -646,7 +648,7 @@ class PCRE::Arg { inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { } inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } -inline bool PCRE::Arg::Parse(const char* str, int n) const { +inline bool PCRE::Arg::Parse(const char* str, size_t n) const { return (*parser_)(str, n, arg_); } diff --git a/util/random.cc b/util/random.cc deleted file mode 100644 index 49d6195..0000000 --- a/util/random.cc +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2005-2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Modified from Google perftools's tcmalloc_unittest.cc. - -#include "util/random.h" - -namespace re2 { - -int32 ACMRandom::Next() { - const int32 M = 2147483647L; // 2^31-1 - const int32 A = 16807; - // In effect, we are computing seed_ = (seed_ * A) % M, where M = 2^31-1 - uint32 lo = A * (int32)(seed_ & 0xFFFF); - uint32 hi = A * (int32)((uint32)seed_ >> 16); - lo += (hi & 0x7FFF) << 16; - if (lo > M) { - lo &= M; - ++lo; - } - lo += hi >> 15; - if (lo > M) { - lo &= M; - ++lo; - } - return (seed_ = (int32) lo); -} - -int32 ACMRandom::Uniform(int32 n) { - return Next() % n; -} - -} // namespace re2 diff --git a/util/random.h b/util/random.h deleted file mode 100644 index 6c67b2c..0000000 --- a/util/random.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2005-2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef UTIL_RANDOM_H_ -#define UTIL_RANDOM_H_ - -// Modified from Google perftools's tcmalloc_unittest.cc. - -#include "util/util.h" - -namespace re2 { - -// ACM minimal standard random number generator. (re-entrant.) -class ACMRandom { - public: - ACMRandom(int32 seed) : seed_(seed) {} - int32 Next(); - int32 Uniform(int32); - - void Reset(int32 seed) { seed_ = seed; } - - private: - int32 seed_; -}; - -} // namespace re2 - -#endif // UTIL_RANDOM_H_ diff --git a/util/rune.cc b/util/rune.cc index e6231ce..4f625ea 100644 --- a/util/rune.cc +++ b/util/rune.cc @@ -11,8 +11,10 @@ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ + #include <stdarg.h> #include <string.h> + #include "util/utf.h" namespace re2 { diff --git a/util/sparse_array.h b/util/sparse_array.h index d37a10a..3b651cd 100644 --- a/util/sparse_array.h +++ b/util/sparse_array.h @@ -6,49 +6,49 @@ #define UTIL_SPARSE_ARRAY_H_ // DESCRIPTION -// +// // SparseArray<T>(m) is a map from integers in [0, m) to T values. // It requires (sizeof(T)+sizeof(int))*m memory, but it provides // fast iteration through the elements in the array and fast clearing // of the array. The array has a concept of certain elements being // uninitialized (having no value). -// +// // Insertion and deletion are constant time operations. -// -// Allocating the array is a constant time operation +// +// Allocating the array is a constant time operation // when memory allocation is a constant time operation. -// +// // Clearing the array is a constant time operation (unusual!). -// +// // Iterating through the array is an O(n) operation, where n // is the number of items in the array (not O(m)). // -// The array iterator visits entries in the order they were first +// The array iterator visits entries in the order they were first // inserted into the array. It is safe to add items to the array while // using an iterator: the iterator will visit indices added to the array // during the iteration, but will not re-visit indices whose values // change after visiting. Thus SparseArray can be a convenient // implementation of a work queue. -// +// // The SparseArray implementation is NOT thread-safe. It is up to the // caller to make sure only one thread is accessing the array. (Typically // these arrays are temporary values and used in situations where speed is // important.) -// +// // The SparseArray interface does not present all the usual STL bells and // whistles. -// +// // Implemented with reference to Briggs & Torczon, An Efficient // Representation for Sparse Sets, ACM Letters on Programming Languages // and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. -// +// // Briggs & Torczon popularized this technique, but it had been known // long before their paper. They point out that Aho, Hopcroft, and // Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's // 1986 Programming Pearls both hint at the technique in exercises to the // reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1 // exercise 8). -// +// // Briggs & Torczon describe a sparse set implementation. I have // trivially generalized it to create a sparse array (actually the original // target of the AHU and Bentley exercises). @@ -58,7 +58,7 @@ // SparseArray uses a vector dense_ and an array sparse_to_dense_, both of // size max_size_. At any point, the number of elements in the sparse array is // size_. -// +// // The vector dense_ contains the size_ elements in the sparse array (with // their indices), // in the order that the elements were first inserted. This array is dense: @@ -67,15 +67,15 @@ // The array sparse_to_dense_ maps from indices in [0,m) to indices in // [0,size_). // For indices present in the array, dense_[sparse_to_dense_[i]].index_ == i. -// For indices not present in the array, sparse_to_dense_ can contain +// For indices not present in the array, sparse_to_dense_ can contain // any value at all, perhaps outside the range [0, size_) but perhaps not. -// +// // The lax requirement on sparse_to_dense_ values makes clearing // the array very easy: set size_ to 0. Lookups are slightly more // complicated. An index i has a value in the array if and only if: // sparse_to_dense_[i] is in [0, size_) AND // dense_[sparse_to_dense_[i]].index_ == i. -// If both these properties hold, only then it is safe to refer to +// If both these properties hold, only then it is safe to refer to // dense_[sparse_to_dense_[i]].value_ // as the value associated with index i. // @@ -85,14 +85,24 @@ // Deletion of specific values from the array is implemented by // swapping dense_[size_-1] and the dense_ being deleted and then // updating the appropriate sparse_to_dense_ entries. -// +// // To make the sparse array as efficient as possible for non-primitive types, // elements may or may not be destroyed when they are deleted from the sparse // array through a call to erase(), erase_existing() or resize(). They // immediately become inaccessible, but they are only guaranteed to be // destroyed when the SparseArray destructor is called. +// +// A moved-from SparseArray will be empty. + +#include <stdint.h> +#include <string.h> +#include <algorithm> +#include <memory> +#include <utility> +#include <vector> #include "util/util.h" +#include "util/logging.h" namespace re2 { @@ -100,23 +110,34 @@ template<typename Value> class SparseArray { public: SparseArray(); - SparseArray(int max_size); + explicit SparseArray(int max_size); ~SparseArray(); // IndexValue pairs: exposed in SparseArray::iterator. class IndexValue; typedef IndexValue value_type; - typedef typename vector<IndexValue>::iterator iterator; - typedef typename vector<IndexValue>::const_iterator const_iterator; + typedef typename std::vector<IndexValue>::iterator iterator; + typedef typename std::vector<IndexValue>::const_iterator const_iterator; - inline const IndexValue& iv(int i) const; + SparseArray(const SparseArray& src); + SparseArray(SparseArray&& src) noexcept; + + SparseArray& operator=(const SparseArray& src); + SparseArray& operator=(SparseArray&& src) noexcept; + + const IndexValue& iv(int i) const; // Return the number of entries in the array. int size() const { return size_; } + // Indicate whether the array is empty. + int empty() const { + return size_ == 0; + } + // Iterate over the array. iterator begin() { return dense_.begin(); @@ -148,39 +169,68 @@ class SparseArray { } // Check whether index i is in the array. - inline bool has_index(int i) const; + bool has_index(int i) const; // Comparison function for sorting. // Can sort the sparse array so that future iterations // will visit indices in increasing order using - // sort(arr.begin(), arr.end(), arr.less); + // std::sort(arr.begin(), arr.end(), arr.less); static bool less(const IndexValue& a, const IndexValue& b); public: // Set the value at index i to v. - inline iterator set(int i, Value v); + iterator set(int i, const Value& v) { + return SetInternal(true, i, v); + } + iterator set(int i, Value&& v) { // NOLINT + return SetInternal(true, i, std::move(v)); + } - pair<iterator, bool> insert(const value_type& new_value); + std::pair<iterator, bool> insert(const value_type& v) { + return InsertInternal(v); + } + std::pair<iterator, bool> insert(value_type&& v) { // NOLINT + return InsertInternal(std::move(v)); + } - // Returns the value at index i - // or defaultv if index i is not initialized in the array. - inline Value get(int i, Value defaultv) const; + template <typename... Args> + std::pair<iterator, bool> emplace(Args&&... args) { // NOLINT + return InsertInternal(value_type(std::forward<Args>(args)...)); + } - iterator find(int i); + iterator find(int i) { + if (has_index(i)) + return dense_.begin() + sparse_to_dense_[i]; + return end(); + } - const_iterator find(int i) const; + const_iterator find(int i) const { + if (has_index(i)) + return dense_.begin() + sparse_to_dense_[i]; + return end(); + } // Change the value at index i to v. // Fast but unsafe: only use if has_index(i) is true. - inline iterator set_existing(int i, Value v); + iterator set_existing(int i, const Value& v) { + return SetExistingInternal(i, v); + } + iterator set_existing(int i, Value&& v) { // NOLINT + return SetExistingInternal(i, std::move(v)); + } // Set the value at the new index i to v. // Fast but unsafe: only use if has_index(i) is false. - inline iterator set_new(int i, Value v); + iterator set_new(int i, const Value& v) { + return SetInternal(false, i, v); + } + iterator set_new(int i, Value&& v) { // NOLINT + return SetInternal(false, i, std::move(v)); + } // Get the value at index i from the array.. // Fast but unsafe: only use if has_index(i) is true. - inline Value get_existing(int i) const; + const Value& get_existing(int i) const; // Erasing items from the array during iteration is in general // NOT safe. There is one special case, which is that the current @@ -201,44 +251,122 @@ class SparseArray { // the iterators could walk past the end of the array. // Erases the element at index i from the array. - inline void erase(int i); + void erase(int i); // Erases the element at index i from the array. // Fast but unsafe: only use if has_index(i) is true. - inline void erase_existing(int i); + void erase_existing(int i); private: + template <typename U> + std::pair<iterator, bool> InsertInternal(U&& v) { + DebugCheckInvariants(); + std::pair<iterator, bool> p; + if (has_index(v.index_)) { + p = {dense_.begin() + sparse_to_dense_[v.index_], false}; + } else { + p = {set_new(std::forward<U>(v).index_, std::forward<U>(v).second), true}; + } + DebugCheckInvariants(); + return p; + } + + template <typename U> + iterator SetInternal(bool allow_overwrite, int i, U&& v) { // NOLINT + DebugCheckInvariants(); + if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { + LOG(DFATAL) << "(jyasskin) Illegal index " << i + << " passed to SparseArray(" << max_size_ + << ").set" << (allow_overwrite?"":"_new") << "()."; + // Semantically, end() would be better here, but we already know + // the user did something stupid, so begin() insulates them from + // dereferencing an invalid pointer. + return begin(); + } + if (!allow_overwrite) { + DCHECK(!has_index(i)); + create_index(i); + } else { + if (!has_index(i)) + create_index(i); + } + return set_existing(i, std::forward<U>(v)); // NOLINT + } + + template <typename U> + iterator SetExistingInternal(int i, U&& v) { // NOLINT + DebugCheckInvariants(); + DCHECK(has_index(i)); + dense_[sparse_to_dense_[i]].value() = std::forward<U>(v); + DebugCheckInvariants(); + return dense_.begin() + sparse_to_dense_[i]; + } + // Add the index i to the array. // Only use if has_index(i) is known to be false. // Since it doesn't set the value associated with i, // this function is private, only intended as a helper // for other methods. - inline void create_index(int i); + void create_index(int i); // In debug mode, verify that some invariant properties of the class // are being maintained. This is called at the end of the constructor // and at the beginning and end of all public non-const member functions. - inline void DebugCheckInvariants() const; + void DebugCheckInvariants() const; - static bool InitMemory() { -#ifdef MEMORY_SANITIZER - return true; -#else - return RunningOnValgrind(); -#endif - } + int size_ = 0; + int max_size_ = 0; + std::unique_ptr<int[]> sparse_to_dense_; + std::vector<IndexValue> dense_; +}; - int size_; - int max_size_; - int* sparse_to_dense_; - vector<IndexValue> dense_; +template<typename Value> +SparseArray<Value>::SparseArray() = default; - DISALLOW_COPY_AND_ASSIGN(SparseArray); -}; +template<typename Value> +SparseArray<Value>::SparseArray(const SparseArray& src) + : size_(src.size_), + max_size_(src.max_size_), + sparse_to_dense_(new int[max_size_]), + dense_(src.dense_) { + std::copy_n(src.sparse_to_dense_.get(), max_size_, sparse_to_dense_.get()); +} + +template<typename Value> +SparseArray<Value>::SparseArray(SparseArray&& src) noexcept // NOLINT + : size_(src.size_), + max_size_(src.max_size_), + sparse_to_dense_(std::move(src.sparse_to_dense_)), + dense_(std::move(src.dense_)) { + src.size_ = 0; + src.max_size_ = 0; + src.dense_.clear(); +} template<typename Value> -SparseArray<Value>::SparseArray() - : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_() {} +SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) { + std::unique_ptr<int[]> a(new int[src.max_size_]); + std::copy_n(src.sparse_to_dense_.get(), src.max_size_, a.get()); + sparse_to_dense_ = std::move(a); + dense_ = src.dense_; + max_size_ = src.max_size_; + size_ = src.size_; + return *this; +} + +template<typename Value> +SparseArray<Value>& SparseArray<Value>::operator=( + SparseArray&& src) noexcept { // NOLINT + size_ = src.size_; + max_size_ = src.max_size_; + sparse_to_dense_ = std::move(src.sparse_to_dense_); + dense_ = std::move(src.dense_); + // clear out the source + src.size_ = 0; + src.max_size_ = 0; + src.dense_.clear(); + return *this; +} // IndexValue pairs: exposed in SparseArray::iterator. template<typename Value> @@ -249,17 +377,24 @@ class SparseArray<Value>::IndexValue { typedef Value second_type; IndexValue() {} - IndexValue(int index, const Value& value) : second(value), index_(index) {} + IndexValue(int i, const Value& v) : index_(i), second(v) {} + IndexValue(int i, Value&& v) : index_(i), second(std::move(v)) {} int index() const { return index_; } - Value value() const { return second; } - // Provide the data in the 'second' member so that the utilities - // in map-util work. - Value second; + Value& value() & { return second; } + const Value& value() const & { return second; } + Value&& value() && { return std::move(second); } // NOLINT private: int index_; + + public: + // Provide the data in the 'second' member so that the utilities + // in map-util work. + // TODO(billydonahue): 'second' is public for short-term compatibility. + // Users will be transitioned to using value() accessor. + Value second; }; template<typename Value> @@ -273,30 +408,25 @@ SparseArray<Value>::iv(int i) const { // Change the maximum size of the array. // Invalidates all iterators. template<typename Value> -void SparseArray<Value>::resize(int new_max_size) { +void SparseArray<Value>::resize(int max_size) { DebugCheckInvariants(); - if (new_max_size > max_size_) { - int* a = new int[new_max_size]; + if (max_size > max_size_) { + std::unique_ptr<int[]> a(new int[max_size]); if (sparse_to_dense_) { - memmove(a, sparse_to_dense_, max_size_*sizeof a[0]); - delete[] sparse_to_dense_; + std::copy_n(sparse_to_dense_.get(), max_size_, a.get()); } - sparse_to_dense_ = a; - - dense_.resize(new_max_size); - - // These don't need to be initialized for correctness, - // but Valgrind will warn about use of uninitialized memory, - // so initialize the new memory when compiling debug binaries. - // Initialize it to garbage to detect bugs in the future. - if (InitMemory()) { - for (int i = max_size_; i < new_max_size; i++) { - sparse_to_dense_[i] = 0xababababU; - dense_[i].index_ = 0xababababU; - } + sparse_to_dense_ = std::move(a); + + dense_.resize(max_size); + +#ifdef MEMORY_SANITIZER + for (int i = max_size_; i < max_size; i++) { + sparse_to_dense_[i] = 0xababababU; + dense_[i].index_ = 0xababababU; } +#endif } - max_size_ = new_max_size; + max_size_ = max_size; if (size_ > max_size_) size_ = max_size_; DebugCheckInvariants(); @@ -307,93 +437,16 @@ template<typename Value> bool SparseArray<Value>::has_index(int i) const { DCHECK_GE(i, 0); DCHECK_LT(i, max_size_); - if (static_cast<uint>(i) >= static_cast<uint>(max_size_)) { + if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { return false; } // Unsigned comparison avoids checking sparse_to_dense_[i] < 0. - return (uint)sparse_to_dense_[i] < (uint)size_ && - dense_[sparse_to_dense_[i]].index_ == i; + return (uint32_t)sparse_to_dense_[i] < (uint32_t)size_ && + dense_[sparse_to_dense_[i]].index_ == i; } -// Set the value at index i to v. template<typename Value> -typename SparseArray<Value>::iterator SparseArray<Value>::set(int i, Value v) { - DebugCheckInvariants(); - if (static_cast<uint>(i) >= static_cast<uint>(max_size_)) { - // Semantically, end() would be better here, but we already know - // the user did something stupid, so begin() insulates them from - // dereferencing an invalid pointer. - return begin(); - } - if (!has_index(i)) - create_index(i); - return set_existing(i, v); -} - -template<typename Value> -pair<typename SparseArray<Value>::iterator, bool> SparseArray<Value>::insert( - const value_type& new_value) { - DebugCheckInvariants(); - pair<typename SparseArray<Value>::iterator, bool> p; - if (has_index(new_value.index_)) { - p = make_pair(dense_.begin() + sparse_to_dense_[new_value.index_], false); - } else { - p = make_pair(set_new(new_value.index_, new_value.second), true); - } - DebugCheckInvariants(); - return p; -} - -template<typename Value> -Value SparseArray<Value>::get(int i, Value defaultv) const { - if (!has_index(i)) - return defaultv; - return get_existing(i); -} - -template<typename Value> -typename SparseArray<Value>::iterator SparseArray<Value>::find(int i) { - if (has_index(i)) - return dense_.begin() + sparse_to_dense_[i]; - return end(); -} - -template<typename Value> -typename SparseArray<Value>::const_iterator -SparseArray<Value>::find(int i) const { - if (has_index(i)) { - return dense_.begin() + sparse_to_dense_[i]; - } - return end(); -} - -template<typename Value> -typename SparseArray<Value>::iterator -SparseArray<Value>::set_existing(int i, Value v) { - DebugCheckInvariants(); - DCHECK(has_index(i)); - dense_[sparse_to_dense_[i]].second = v; - DebugCheckInvariants(); - return dense_.begin() + sparse_to_dense_[i]; -} - -template<typename Value> -typename SparseArray<Value>::iterator -SparseArray<Value>::set_new(int i, Value v) { - DebugCheckInvariants(); - if (static_cast<uint>(i) >= static_cast<uint>(max_size_)) { - // Semantically, end() would be better here, but we already know - // the user did something stupid, so begin() insulates them from - // dereferencing an invalid pointer. - return begin(); - } - DCHECK(!has_index(i)); - create_index(i); - return set_existing(i, v); -} - -template<typename Value> -Value SparseArray<Value>::get_existing(int i) const { +const Value& SparseArray<Value>::get_existing(int i) const { DCHECK(has_index(i)); return dense_[sparse_to_dense_[i]].second; } @@ -412,7 +465,7 @@ void SparseArray<Value>::erase_existing(int i) { DCHECK(has_index(i)); int di = sparse_to_dense_[i]; if (di < size_ - 1) { - dense_[di] = dense_[size_ - 1]; + dense_[di] = std::move(dense_[size_ - 1]); sparse_to_dense_[dense_[di].index_] = di; } size_--; @@ -430,22 +483,22 @@ void SparseArray<Value>::create_index(int i) { template<typename Value> SparseArray<Value>::SparseArray(int max_size) { max_size_ = max_size; - sparse_to_dense_ = new int[max_size]; + sparse_to_dense_ = std::unique_ptr<int[]>(new int[max_size]); dense_.resize(max_size); - // Don't need to zero the new memory, but appease Valgrind. - if (InitMemory()) { - for (int i = 0; i < max_size; i++) { - sparse_to_dense_[i] = 0xababababU; - dense_[i].index_ = 0xababababU; - } - } size_ = 0; + +#ifdef MEMORY_SANITIZER + for (int i = 0; i < max_size; i++) { + sparse_to_dense_[i] = 0xababababU; + dense_[i].index_ = 0xababababU; + } +#endif + DebugCheckInvariants(); } template<typename Value> SparseArray<Value>::~SparseArray() { DebugCheckInvariants(); - delete[] sparse_to_dense_; } template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const { diff --git a/util/sparse_set.h b/util/sparse_set.h index 537a094..c52ab74 100644 --- a/util/sparse_set.h +++ b/util/sparse_set.h @@ -6,179 +6,254 @@ #define UTIL_SPARSE_SET_H_ // DESCRIPTION -// -// SparseSet<T>(m) is a set of integers in [0, m). +// +// SparseSet(m) is a set of integers in [0, m). // It requires sizeof(int)*m memory, but it provides // fast iteration through the elements in the set and fast clearing // of the set. -// +// // Insertion and deletion are constant time operations. -// -// Allocating the set is a constant time operation +// +// Allocating the set is a constant time operation // when memory allocation is a constant time operation. -// +// // Clearing the set is a constant time operation (unusual!). -// +// // Iterating through the set is an O(n) operation, where n // is the number of items in the set (not O(m)). // -// The set iterator visits entries in the order they were first -// inserted into the array. It is safe to add items to the set while +// The set iterator visits entries in the order they were first +// inserted into the set. It is safe to add items to the set while // using an iterator: the iterator will visit indices added to the set // during the iteration, but will not re-visit indices whose values // change after visiting. Thus SparseSet can be a convenient // implementation of a work queue. -// +// // The SparseSet implementation is NOT thread-safe. It is up to the // caller to make sure only one thread is accessing the set. (Typically // these sets are temporary values and used in situations where speed is // important.) -// +// // The SparseSet interface does not present all the usual STL bells and // whistles. -// +// // Implemented with reference to Briggs & Torczon, An Efficient // Representation for Sparse Sets, ACM Letters on Programming Languages // and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. -// -// For a generalization to sparse array, see sparse_array.h. +// +// This is a specialization of sparse array; see sparse_array.h. // IMPLEMENTATION // -// See sparse_array.h for implementation details +// See sparse_array.h for implementation details. + +#include <stdint.h> +#include <string.h> +#include <algorithm> +#include <memory> +#include <utility> +#include <vector> #include "util/util.h" +#include "util/logging.h" namespace re2 { -class SparseSet { +template<typename Value> +class SparseSetT { public: - SparseSet() - : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL) {} - - SparseSet(int max_size) { - max_size_ = max_size; - sparse_to_dense_ = new int[max_size]; - dense_ = new int[max_size]; - // Don't need to zero the memory, but do so anyway - // to appease Valgrind. - if (InitMemory()) { - for (int i = 0; i < max_size; i++) { - dense_[i] = 0xababababU; - sparse_to_dense_[i] = 0xababababU; - } - } - size_ = 0; + SparseSetT(); + explicit SparseSetT(int max_size); + ~SparseSetT(); + + typedef typename std::vector<int>::iterator iterator; + typedef typename std::vector<int>::const_iterator const_iterator; + + // Return the number of entries in the set. + int size() const { + return size_; } - ~SparseSet() { - delete[] sparse_to_dense_; - delete[] dense_; + // Indicate whether the set is empty. + int empty() const { + return size_ == 0; } - typedef int* iterator; - typedef const int* const_iterator; + // Iterate over the set. + iterator begin() { + return dense_.begin(); + } + iterator end() { + return dense_.begin() + size_; + } - int size() const { return size_; } - iterator begin() { return dense_; } - iterator end() { return dense_ + size_; } - const_iterator begin() const { return dense_; } - const_iterator end() const { return dense_ + size_; } + const_iterator begin() const { + return dense_.begin(); + } + const_iterator end() const { + return dense_.begin() + size_; + } - // Change the maximum size of the array. + // Change the maximum size of the set. // Invalidates all iterators. - void resize(int new_max_size) { - if (size_ > new_max_size) - size_ = new_max_size; - if (new_max_size > max_size_) { - int* a = new int[new_max_size]; - if (sparse_to_dense_) { - memmove(a, sparse_to_dense_, max_size_*sizeof a[0]); - if (InitMemory()) { - for (int i = max_size_; i < new_max_size; i++) - a[i] = 0xababababU; - } - delete[] sparse_to_dense_; - } - sparse_to_dense_ = a; - - a = new int[new_max_size]; - if (dense_) { - memmove(a, dense_, size_*sizeof a[0]); - if (InitMemory()) { - for (int i = size_; i < new_max_size; i++) - a[i] = 0xababababU; - } - delete[] dense_; - } - dense_ = a; - } - max_size_ = new_max_size; - } + void resize(int max_size); - // Return the maximum size of the array. + // Return the maximum size of the set. // Indices can be in the range [0, max_size). - int max_size() const { return max_size_; } - - // Clear the array. - void clear() { size_ = 0; } + int max_size() const { + return max_size_; + } - // Check whether i is in the array. - bool contains(int i) const { - DCHECK_GE(i, 0); - DCHECK_LT(i, max_size_); - if (static_cast<uint>(i) >= static_cast<uint>(max_size_)) { - return false; - } - // Unsigned comparison avoids checking sparse_to_dense_[i] < 0. - return (uint)sparse_to_dense_[i] < (uint)size_ && - dense_[sparse_to_dense_[i]] == i; + // Clear the set. + void clear() { + size_ = 0; } - // Adds i to the set. - void insert(int i) { - if (!contains(i)) - insert_new(i); + // Check whether index i is in the set. + bool contains(int i) const; + + // Comparison function for sorting. + // Can sort the sparse set so that future iterations + // will visit indices in increasing order using + // std::sort(arr.begin(), arr.end(), arr.less); + static bool less(int a, int b); + + public: + // Insert index i into the set. + iterator insert(int i) { + return InsertInternal(true, i); } - // Set the value at the new index i to v. + // Insert index i into the set. // Fast but unsafe: only use if contains(i) is false. - void insert_new(int i) { - if (static_cast<uint>(i) >= static_cast<uint>(max_size_)) { + iterator insert_new(int i) { + return InsertInternal(false, i); + } + + private: + iterator InsertInternal(bool allow_existing, int i) { + DebugCheckInvariants(); + if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { + LOG(DFATAL) << "(jyasskin) Illegal index " << i + << " passed to SparseSet(" << max_size_ + << ").insert" << (allow_existing?"":"_new") << "()."; // Semantically, end() would be better here, but we already know // the user did something stupid, so begin() insulates them from // dereferencing an invalid pointer. - return; + return begin(); } - DCHECK(!contains(i)); - DCHECK_LT(size_, max_size_); - sparse_to_dense_[i] = size_; - dense_[size_] = i; - size_++; + if (!allow_existing) { + DCHECK(!contains(i)); + create_index(i); + } else { + if (!contains(i)) + create_index(i); + } + DebugCheckInvariants(); + return dense_.begin() + sparse_to_dense_[i]; } - // Comparison function for sorting. - // Can sort the sparse array so that future iterations - // will visit indices in increasing order using - // sort(arr.begin(), arr.end(), arr.less); - static bool less(int a, int b) { return a < b; } + // Add the index i to the set. + // Only use if contains(i) is known to be false. + // This function is private, only intended as a helper + // for other methods. + void create_index(int i); + + // In debug mode, verify that some invariant properties of the class + // are being maintained. This is called at the end of the constructor + // and at the beginning and end of all public non-const member functions. + void DebugCheckInvariants() const; + + int size_ = 0; + int max_size_ = 0; + std::unique_ptr<int[]> sparse_to_dense_; + std::vector<int> dense_; +}; + +template<typename Value> +SparseSetT<Value>::SparseSetT() = default; + +// Change the maximum size of the set. +// Invalidates all iterators. +template<typename Value> +void SparseSetT<Value>::resize(int max_size) { + DebugCheckInvariants(); + if (max_size > max_size_) { + std::unique_ptr<int[]> a(new int[max_size]); + if (sparse_to_dense_) { + std::copy_n(sparse_to_dense_.get(), max_size_, a.get()); + } + sparse_to_dense_ = std::move(a); + + dense_.resize(max_size); - private: - static bool InitMemory() { #ifdef MEMORY_SANITIZER - return true; -#else - return RunningOnValgrind(); + for (int i = max_size_; i < max_size; i++) { + sparse_to_dense_[i] = 0xababababU; + dense_[i] = 0xababababU; + } #endif } + max_size_ = max_size; + if (size_ > max_size_) + size_ = max_size_; + DebugCheckInvariants(); +} - int size_; - int max_size_; - int* sparse_to_dense_; - int* dense_; +// Check whether index i is in the set. +template<typename Value> +bool SparseSetT<Value>::contains(int i) const { + DCHECK_GE(i, 0); + DCHECK_LT(i, max_size_); + if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { + return false; + } + // Unsigned comparison avoids checking sparse_to_dense_[i] < 0. + return (uint32_t)sparse_to_dense_[i] < (uint32_t)size_ && + dense_[sparse_to_dense_[i]] == i; +} - DISALLOW_COPY_AND_ASSIGN(SparseSet); -}; +template<typename Value> +void SparseSetT<Value>::create_index(int i) { + DCHECK(!contains(i)); + DCHECK_LT(size_, max_size_); + sparse_to_dense_[i] = size_; + dense_[size_] = i; + size_++; +} + +template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) { + max_size_ = max_size; + sparse_to_dense_ = std::unique_ptr<int[]>(new int[max_size]); + dense_.resize(max_size); + size_ = 0; + +#ifdef MEMORY_SANITIZER + for (int i = 0; i < max_size; i++) { + sparse_to_dense_[i] = 0xababababU; + dense_[i] = 0xababababU; + } +#endif + + DebugCheckInvariants(); +} + +template<typename Value> SparseSetT<Value>::~SparseSetT() { + DebugCheckInvariants(); +} + +template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const { + DCHECK_LE(0, size_); + DCHECK_LE(size_, max_size_); + DCHECK(size_ == 0 || sparse_to_dense_ != NULL); +} + +// Comparison function for sorting. +template<typename Value> bool SparseSetT<Value>::less(int a, int b) { + return a < b; +} + +typedef SparseSetT<void> SparseSet; } // namespace re2 diff --git a/util/stringprintf.cc b/util/stringprintf.cc deleted file mode 100644 index e71d993..0000000 --- a/util/stringprintf.cc +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2002 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "util/util.h" - -namespace re2 { - -static void StringAppendV(string* dst, const char* format, va_list ap) { - // First try with a small fixed size buffer - char space[1024]; - - // It's possible for methods that use a va_list to invalidate - // the data in it upon use. The fix is to make a copy - // of the structure before using it and use that copy instead. - va_list backup_ap; - va_copy(backup_ap, ap); - int result = vsnprintf(space, sizeof(space), format, backup_ap); - va_end(backup_ap); - - if ((result >= 0) && (static_cast<unsigned long>(result) < sizeof(space))) { - // It fit - dst->append(space, result); - return; - } - - // Repeatedly increase buffer size until it fits - int length = sizeof(space); - while (true) { - if (result < 0) { - // Older behavior: just try doubling the buffer size - length *= 2; - } else { - // We need exactly "result+1" characters - length = result+1; - } - char* buf = new char[length]; - - // Restore the va_list before we use it again - va_copy(backup_ap, ap); -#if !defined(_WIN32) - result = vsnprintf(buf, length, format, backup_ap); -#else - // On Windows, the function takes five arguments, not four. With an array, - // the buffer size will be inferred, but not with a pointer. C'est la vie. - // (See https://github.com/google/re2/issues/40 for more details.) - result = vsnprintf(buf, length, _TRUNCATE, format, backup_ap); -#endif - va_end(backup_ap); - - if ((result >= 0) && (result < length)) { - // It fit - dst->append(buf, result); - delete[] buf; - return; - } - delete[] buf; - } -} - -string StringPrintf(const char* format, ...) { - va_list ap; - va_start(ap, format); - string result; - StringAppendV(&result, format, ap); - va_end(ap); - return result; -} - -void SStringPrintf(string* dst, const char* format, ...) { - va_list ap; - va_start(ap, format); - dst->clear(); - StringAppendV(dst, format, ap); - va_end(ap); -} - -void StringAppendF(string* dst, const char* format, ...) { - va_list ap; - va_start(ap, format); - StringAppendV(dst, format, ap); - va_end(ap); -} - -} // namespace re2 diff --git a/util/strutil.cc b/util/strutil.cc index d3a0249..7181073 100644 --- a/util/strutil.cc +++ b/util/strutil.cc @@ -2,8 +2,15 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "util/util.h" -#include "re2/stringpiece.h" +#include <stdarg.h> +#include <stdio.h> + +#include "util/strutil.h" + +#ifdef _WIN32 +#define snprintf _snprintf +#define vsnprintf _vsnprintf +#endif namespace re2 { @@ -12,16 +19,16 @@ namespace re2 { // Copies 'src' to 'dest', escaping dangerous characters using // C-style escape sequences. 'src' and 'dest' should not overlap. // Returns the number of bytes written to 'dest' (not including the \0) -// or -1 if there was insufficient space. +// or (size_t)-1 if there was insufficient space. // ---------------------------------------------------------------------- -int CEscapeString(const char* src, int src_len, char* dest, - int dest_len) { +static size_t CEscapeString(const char* src, size_t src_len, + char* dest, size_t dest_len) { const char* src_end = src + src_len; - int used = 0; + size_t used = 0; for (; src < src_end; src++) { if (dest_len - used < 2) // space for two-character escape - return -1; + return (size_t)-1; unsigned char c = *src; switch (c) { @@ -37,14 +44,8 @@ int CEscapeString(const char* src, int src_len, char* dest, // interpreted as part of the character code by C. if (c < ' ' || c > '~') { if (dest_len - used < 5) // space for four-character escape + \0 - return -1; -#if !defined(_WIN32) + return (size_t)-1; snprintf(dest + used, 5, "\\%03o", c); -#else - // On Windows, the function takes 4+VA arguments, not 3+VA. With an - // array, the buffer size will be inferred, but not with a pointer. - snprintf(dest + used, 5, _TRUNCATE, "\\%03o", c); -#endif used += 4; } else { dest[used++] = c; break; @@ -53,24 +54,23 @@ int CEscapeString(const char* src, int src_len, char* dest, } if (dest_len - used < 1) // make sure that there is room for \0 - return -1; + return (size_t)-1; dest[used] = '\0'; // doesn't count towards return value though return used; } - // ---------------------------------------------------------------------- // CEscape() // Copies 'src' to result, escaping dangerous characters using // C-style escape sequences. 'src' and 'dest' should not overlap. // ---------------------------------------------------------------------- string CEscape(const StringPiece& src) { - const int dest_length = src.size() * 4 + 1; // Maximum possible expansion - char* dest = new char[dest_length]; - const int len = CEscapeString(src.data(), src.size(), - dest, dest_length); - string s = string(dest, len); + const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion + char* dest = new char[dest_len]; + const size_t used = CEscapeString(src.data(), src.size(), + dest, dest_len); + string s = string(dest, used); delete[] dest; return s; } @@ -100,4 +100,73 @@ string PrefixSuccessor(const StringPiece& prefix) { } } +static void StringAppendV(string* dst, const char* format, va_list ap) { + // First try with a small fixed size buffer + char space[1024]; + + // It's possible for methods that use a va_list to invalidate + // the data in it upon use. The fix is to make a copy + // of the structure before using it and use that copy instead. + va_list backup_ap; + va_copy(backup_ap, ap); + int result = vsnprintf(space, sizeof(space), format, backup_ap); + va_end(backup_ap); + + if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) { + // It fit + dst->append(space, result); + return; + } + + // Repeatedly increase buffer size until it fits + int length = sizeof(space); + while (true) { + if (result < 0) { + // Older behavior: just try doubling the buffer size + length *= 2; + } else { + // We need exactly "result+1" characters + length = result+1; + } + char* buf = new char[length]; + + // Restore the va_list before we use it again + va_copy(backup_ap, ap); + result = vsnprintf(buf, length, format, backup_ap); + va_end(backup_ap); + + if ((result >= 0) && (result < length)) { + // It fit + dst->append(buf, result); + delete[] buf; + return; + } + delete[] buf; + } +} + +string StringPrintf(const char* format, ...) { + va_list ap; + va_start(ap, format); + string result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} + +void SStringPrintf(string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); +} + +void StringAppendF(string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} + } // namespace re2 diff --git a/util/strutil.h b/util/strutil.h new file mode 100644 index 0000000..71dd293 --- /dev/null +++ b/util/strutil.h @@ -0,0 +1,23 @@ +// Copyright 2016 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef UTIL_STRUTIL_H_ +#define UTIL_STRUTIL_H_ + +#include <string> + +#include "re2/stringpiece.h" +#include "util/util.h" + +namespace re2 { + +string CEscape(const StringPiece& src); +string PrefixSuccessor(const StringPiece& prefix); +string StringPrintf(const char* format, ...); +void SStringPrintf(string* dst, const char* format, ...); +void StringAppendF(string* dst, const char* format, ...); + +} // namespace re2 + +#endif // UTIL_STRUTIL_H_ diff --git a/util/test.cc b/util/test.cc index 0a751fe..fb31ed8 100644 --- a/util/test.cc +++ b/util/test.cc @@ -6,6 +6,7 @@ #ifndef _WIN32 #include <sys/resource.h> #endif + #include "util/test.h" DEFINE_string(test_tmpdir, "/var/tmp", "temp directory"); diff --git a/util/test.h b/util/test.h index 4bdd343..e075c1e 100644 --- a/util/test.h +++ b/util/test.h @@ -7,6 +7,7 @@ #include "util/util.h" #include "util/flags.h" +#include "util/logging.h" #define TEST(x, y) \ void x##y(void); \ diff --git a/util/thread.cc b/util/thread.cc deleted file mode 100644 index d97f14b..0000000 --- a/util/thread.cc +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "util/thread.h" -#include "util/util.h" - -Thread::Thread() { - pid_ = 0; - running_ = 0; - joinable_ = 0; -} - -Thread::~Thread() { -} - -void *startThread(void *v) { - Thread* t = (Thread*)v; - t->Run(); - return 0; -} - -void Thread::Start() { - CHECK(!running_); - pthread_create(&pid_, 0, startThread, this); - running_ = true; - if (!joinable_) - pthread_detach(pid_); -} - -void Thread::Join() { - CHECK(running_); - CHECK(joinable_); - void *val; - pthread_join(pid_, &val); - running_ = 0; -} - -void Thread::SetJoinable(bool j) { - CHECK(!running_); - joinable_ = j; -} diff --git a/util/thread.h b/util/thread.h deleted file mode 100644 index f9ecaf6..0000000 --- a/util/thread.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef UTIL_THREAD_H_ -#define UTIL_THREAD_H_ - -#ifdef _WIN32 -#include <windows.h> -#else -#include <pthread.h> -#endif - -class Thread { - public: - Thread(); - virtual ~Thread(); - void Start(); - void Join(); - void SetJoinable(bool); - virtual void Run() = 0; - - private: -#ifdef _WIN32 - HANDLE pid_; -#else - pthread_t pid_; -#endif - bool running_; - bool joinable_; -}; - -#endif // UTIL_THREAD_H_ diff --git a/util/threadwin.cc b/util/threadwin.cc deleted file mode 100644 index d68f2c5..0000000 --- a/util/threadwin.cc +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "util/thread.h" -#include "util/util.h" - -Thread::Thread() { - pid_ = 0; - running_ = 0; - joinable_ = 0; -} - -Thread::~Thread() { -} - -DWORD WINAPI startThread(void *v) { - Thread* t = (Thread*)v; - t->Run(); - return 0; -} - -void Thread::Start() { - CHECK(!running_); - pid_ = CreateThread(NULL, 0, startThread, this, 0, NULL); - running_ = true; - if (!joinable_) { - CloseHandle(pid_); - pid_ = 0; - } -} - -void Thread::Join() { - CHECK(running_); - CHECK(joinable_); - if (pid_ != 0) - WaitForSingleObject(pid_, INFINITE); - running_ = 0; -} - -void Thread::SetJoinable(bool j) { - CHECK(!running_); - joinable_ = j; -} diff --git a/util/util.h b/util/util.h index 27c075f..a69d842 100644 --- a/util/util.h +++ b/util/util.h @@ -5,100 +5,9 @@ #ifndef UTIL_UTIL_H_ #define UTIL_UTIL_H_ -// C -#include <stdio.h> -#include <string.h> -#include <stdint.h> -#include <stddef.h> // For size_t -#include <assert.h> -#include <stdarg.h> -#include <time.h> // For clock_gettime, CLOCK_REALTIME -#include <ctype.h> // For isdigit, isalpha - -#if !defined(_WIN32) -#include <sys/time.h> // For gettimeofday -#endif - -// C++ -#include <ctime> -#include <vector> +// TODO(junyer): Get rid of this. #include <string> -#include <algorithm> -#include <iosfwd> -#include <map> -#include <stack> -#include <ostream> -#include <utility> -#include <set> -#include <atomic> -#include <mutex> // For std::call_once -#include <unordered_set> -#include <initializer_list> - -// Use std names. -using std::set; -using std::pair; -using std::vector; using std::string; -using std::min; -using std::max; -using std::ostream; -using std::map; -using std::stack; -using std::sort; -using std::swap; -using std::make_pair; -using std::unordered_set; - -#ifdef _WIN32 - -#define snprintf _snprintf_s -#define stricmp _stricmp -#define strtof strtod /* not really correct but best we can do */ -#define strtoll _strtoi64 -#define strtoull _strtoui64 -#define vsnprintf vsnprintf_s - -#pragma warning(disable: 4200) // zero-sized array - -#endif - -namespace re2 { - -typedef int8_t int8; -typedef uint8_t uint8; -typedef int16_t int16; -typedef uint16_t uint16; -typedef int32_t int32; -typedef uint32_t uint32; -typedef int64_t int64; -typedef uint64_t uint64; - -typedef unsigned int uint; - -// Prevent the compiler from complaining about or optimizing away variables -// that appear unused. -#undef ATTRIBUTE_UNUSED -#if defined(__GNUC__) -#define ATTRIBUTE_UNUSED __attribute__ ((unused)) -#else -#define ATTRIBUTE_UNUSED -#endif - -// COMPILE_ASSERT causes a compile error about msg if expr is not true. -#if __cplusplus >= 201103L -#define COMPILE_ASSERT(expr, msg) static_assert(expr, #msg) -#else -template<bool> struct CompileAssert {}; -#define COMPILE_ASSERT(expr, msg) \ - typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] ATTRIBUTE_UNUSED -#endif - -// DISALLOW_COPY_AND_ASSIGN disallows the copy and operator= functions. -// It goes in the private: declarations in a class. -#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&) #define arraysize(array) (int)(sizeof(array)/sizeof((array)[0])) @@ -110,37 +19,4 @@ template<bool> struct CompileAssert {}; #define NO_THREAD_SAFETY_ANALYSIS #endif -class StringPiece; - -string CEscape(const StringPiece& src); -int CEscapeString(const char* src, int src_len, char* dest, int dest_len); - -extern string StringPrintf(const char* format, ...); -extern void SStringPrintf(string* dst, const char* format, ...); -extern void StringAppendF(string* dst, const char* format, ...); -extern string PrefixSuccessor(const StringPiece& prefix); - -uint32 hashword(const uint32*, size_t, uint32); -void hashword2(const uint32*, size_t, uint32*, uint32*); - -static inline uint32 Hash32StringWithSeed(const char* s, int len, uint32 seed) { - return hashword((uint32*)s, len/4, seed); -} - -static inline uint64 Hash64StringWithSeed(const char* s, int len, uint32 seed) { - uint32 x, y; - x = seed; - y = 0; - hashword2((uint32*)s, len/4, &x, &y); - return ((uint64)x << 32) | y; -} - -bool RunningOnValgrind(); - -} // namespace re2 - -#include "util/logging.h" -#include "util/mutex.h" -#include "util/utf.h" - #endif // UTIL_UTIL_H_ diff --git a/util/valgrind.cc b/util/valgrind.cc deleted file mode 100644 index 19ec22e..0000000 --- a/util/valgrind.cc +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "util/util.h" -#ifndef _WIN32 -#include "util/valgrind.h" -#endif - -namespace re2 { - -bool RunningOnValgrind() { -#ifdef RUNNING_ON_VALGRIND - return RUNNING_ON_VALGRIND != 0; -#else - return false; -#endif -} - -} // namespace re2 diff --git a/util/valgrind.h b/util/valgrind.h deleted file mode 100644 index 2200a22..0000000 --- a/util/valgrind.h +++ /dev/null @@ -1,4516 +0,0 @@ -/* -*- c -*- - ---------------------------------------------------------------- - - Notice that the following BSD-style license applies to this one - file (valgrind.h) only. The rest of Valgrind is licensed under the - terms of the GNU General Public License, version 2, unless - otherwise indicated. See the COPYING file in the source - distribution for details. - - ---------------------------------------------------------------- - - This file is part of Valgrind, a dynamic binary instrumentation - framework. - - Copyright (C) 2000-2009 Julian Seward. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment in the product - documentation would be appreciated but is not required. - - 3. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 4. The name of the author may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE - GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ---------------------------------------------------------------- - - Notice that the above BSD-style license applies to this one file - (valgrind.h) only. The entire rest of Valgrind is licensed under - the terms of the GNU General Public License, version 2. See the - COPYING file in the source distribution for details. - - ---------------------------------------------------------------- -*/ - -#ifndef UTIL_VALGRIND_H_ -#define UTIL_VALGRIND_H_ - -/* This file is for inclusion into client (your!) code. - - You can use these macros to manipulate and query Valgrind's - execution inside your own programs. - - The resulting executables will still run without Valgrind, just a - little bit more slowly than they otherwise would, but otherwise - unchanged. When not running on valgrind, each client request - consumes very few (eg. 7) instructions, so the resulting performance - loss is negligible unless you plan to execute client requests - millions of times per second. Nevertheless, if that is still a - problem, you can compile with the NVALGRIND symbol defined (gcc - -DNVALGRIND) so that client requests are not even compiled in. */ - -#include <stdarg.h> - -/* Nb: this file might be included in a file compiled with -ansi. So - we can't use C++ style "//" comments nor the "asm" keyword (instead - use "__asm__"). */ - -/* Derive some tags indicating what the target platform is. Note - that in this file we're using the compiler's CPP symbols for - identifying architectures, which are different to the ones we use - within the rest of Valgrind. Note, __powerpc__ is active for both - 32 and 64-bit PPC, whereas __powerpc64__ is only active for the - latter (on Linux, that is). - - Misc note: how to find out what's predefined in gcc by default: - gcc -Wp,-dM somefile.c -*/ -#undef PLAT_ppc64_aix5 -#undef PLAT_ppc32_aix5 -#undef PLAT_x86_darwin -#undef PLAT_amd64_darwin -#undef PLAT_x86_linux -#undef PLAT_amd64_linux -#undef PLAT_ppc32_linux -#undef PLAT_ppc64_linux -#undef PLAT_arm_linux - -#if defined(_AIX) && defined(__64BIT__) -# define PLAT_ppc64_aix5 1 -#elif defined(_AIX) && !defined(__64BIT__) -# define PLAT_ppc32_aix5 1 -#elif defined(__APPLE__) && defined(__i386__) -# define PLAT_x86_darwin 1 -#elif defined(__APPLE__) && defined(__x86_64__) -# define PLAT_amd64_darwin 1 -#elif defined(__linux__) && defined(__i386__) -# define PLAT_x86_linux 1 -#elif defined(__linux__) && defined(__x86_64__) -# define PLAT_amd64_linux 1 -#elif defined(__linux__) && defined(__powerpc__) && !defined(__powerpc64__) -# define PLAT_ppc32_linux 1 -#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) -# define PLAT_ppc64_linux 1 -#elif defined(__linux__) && defined(__arm__) -# define PLAT_arm_linux 1 -#else -/* If we're not compiling for our target platform, don't generate - any inline asms. */ -# if !defined(NVALGRIND) -# define NVALGRIND 1 -# endif -#endif - - -/* ------------------------------------------------------------------ */ -/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ -/* in here of use to end-users -- skip to the next section. */ -/* ------------------------------------------------------------------ */ - -#if defined(NVALGRIND) - -/* Define NVALGRIND to completely remove the Valgrind magic sequence - from the compiled code (analogous to NDEBUG's effects on - assert()) */ -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - { \ - (_zzq_rlval) = (_zzq_default); \ - } - -#else /* ! NVALGRIND */ - -/* The following defines the magic code sequences which the JITter - spots and handles magically. Don't look too closely at them as - they will rot your brain. - - The assembly code sequences for all architectures is in this one - file. This is because this file must be stand-alone, and we don't - want to have multiple files. - - For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default - value gets put in the return slot, so that everything works when - this is executed not under Valgrind. Args are passed in a memory - block, and so there's no intrinsic limit to the number that could - be passed, but it's currently five. - - The macro args are: - _zzq_rlval result lvalue - _zzq_default default value (result returned when running on real CPU) - _zzq_request request code - _zzq_arg1..5 request params - - The other two macros are used to support function wrapping, and are - a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the - guest's NRADDR pseudo-register and whatever other information is - needed to safely run the call original from the wrapper: on - ppc64-linux, the R2 value at the divert point is also needed. This - information is abstracted into a user-visible type, OrigFn. - - VALGRIND_CALL_NOREDIR_* behaves the same as the following on the - guest, but guarantees that the branch instruction will not be - redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64: - branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a - complete inline asm, since it needs to be combined with more magic - inline asm stuff to be useful. -*/ - -/* ------------------------- x86-{linux,darwin} ---------------- */ - -#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) - -typedef - struct { - unsigned int nraddr; /* where's the code? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "roll $3, %%edi ; roll $13, %%edi\n\t" \ - "roll $29, %%edi ; roll $19, %%edi\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - { volatile unsigned int _zzq_args[6]; \ - volatile unsigned int _zzq_result; \ - _zzq_args[0] = (unsigned int)(_zzq_request); \ - _zzq_args[1] = (unsigned int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned int)(_zzq_arg5); \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %EDX = client_request ( %EAX ) */ \ - "xchgl %%ebx,%%ebx" \ - : "=d" (_zzq_result) \ - : "a" (&_zzq_args[0]), "0" (_zzq_default) \ - : "cc", "memory" \ - ); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - volatile unsigned int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %EAX = guest_NRADDR */ \ - "xchgl %%ecx,%%ecx" \ - : "=a" (__addr) \ - : \ - : "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - } - -#define VALGRIND_CALL_NOREDIR_EAX \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* call-noredir *%EAX */ \ - "xchgl %%edx,%%edx\n\t" -#endif /* PLAT_x86_linux || PLAT_x86_darwin */ - -/* ------------------------ amd64-{linux,darwin} --------------- */ - -#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) - -typedef - struct { - unsigned long long int nraddr; /* where's the code? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ - "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - { volatile unsigned long long int _zzq_args[6]; \ - volatile unsigned long long int _zzq_result; \ - _zzq_args[0] = (unsigned long long int)(_zzq_request); \ - _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %RDX = client_request ( %RAX ) */ \ - "xchgq %%rbx,%%rbx" \ - : "=d" (_zzq_result) \ - : "a" (&_zzq_args[0]), "0" (_zzq_default) \ - : "cc", "memory" \ - ); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - volatile unsigned long long int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %RAX = guest_NRADDR */ \ - "xchgq %%rcx,%%rcx" \ - : "=a" (__addr) \ - : \ - : "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - } - -#define VALGRIND_CALL_NOREDIR_RAX \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* call-noredir *%RAX */ \ - "xchgq %%rdx,%%rdx\n\t" -#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */ - -/* ------------------------ ppc32-linux ------------------------ */ - -#if defined(PLAT_ppc32_linux) - -typedef - struct { - unsigned int nraddr; /* where's the code? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ - "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - \ - { unsigned int _zzq_args[6]; \ - unsigned int _zzq_result; \ - unsigned int* _zzq_ptr; \ - _zzq_args[0] = (unsigned int)(_zzq_request); \ - _zzq_args[1] = (unsigned int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned int)(_zzq_arg5); \ - _zzq_ptr = _zzq_args; \ - __asm__ volatile("mr 3,%1\n\t" /*default*/ \ - "mr 4,%2\n\t" /*ptr*/ \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = client_request ( %R4 ) */ \ - "or 1,1,1\n\t" \ - "mr %0,3" /*result*/ \ - : "=b" (_zzq_result) \ - : "b" (_zzq_default), "b" (_zzq_ptr) \ - : "cc", "memory", "r3", "r4"); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - unsigned int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR */ \ - "or 2,2,2\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "cc", "memory", "r3" \ - ); \ - _zzq_orig->nraddr = __addr; \ - } - -#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* branch-and-link-to-noredir *%R11 */ \ - "or 3,3,3\n\t" -#endif /* PLAT_ppc32_linux */ - -/* ------------------------ ppc64-linux ------------------------ */ - -#if defined(PLAT_ppc64_linux) - -typedef - struct { - unsigned long long int nraddr; /* where's the code? */ - unsigned long long int r2; /* what tocptr do we need? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ - "rotldi 0,0,61 ; rotldi 0,0,51\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - \ - { unsigned long long int _zzq_args[6]; \ - register unsigned long long int _zzq_result __asm__("r3"); \ - register unsigned long long int* _zzq_ptr __asm__("r4"); \ - _zzq_args[0] = (unsigned long long int)(_zzq_request); \ - _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ - _zzq_ptr = _zzq_args; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = client_request ( %R4 ) */ \ - "or 1,1,1" \ - : "=r" (_zzq_result) \ - : "0" (_zzq_default), "r" (_zzq_ptr) \ - : "cc", "memory"); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - register unsigned long long int __addr __asm__("r3"); \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR */ \ - "or 2,2,2" \ - : "=r" (__addr) \ - : \ - : "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR_GPR2 */ \ - "or 4,4,4" \ - : "=r" (__addr) \ - : \ - : "cc", "memory" \ - ); \ - _zzq_orig->r2 = __addr; \ - } - -#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* branch-and-link-to-noredir *%R11 */ \ - "or 3,3,3\n\t" - -#endif /* PLAT_ppc64_linux */ - -/* ------------------------- arm-linux ------------------------- */ - -#if defined(PLAT_arm_linux) - -typedef - struct { - unsigned int nraddr; /* where's the code? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "mov r12, r12, ror #3 ; mov r12, r12, ror #13 \n\t" \ - "mov r12, r12, ror #29 ; mov r12, r12, ror #19 \n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - \ - { volatile unsigned int _zzq_args[6]; \ - volatile unsigned int _zzq_result; \ - _zzq_args[0] = (unsigned int)(_zzq_request); \ - _zzq_args[1] = (unsigned int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned int)(_zzq_arg5); \ - __asm__ volatile("mov r3, %1\n\t" /*default*/ \ - "mov r4, %2\n\t" /*ptr*/ \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* R3 = client_request ( R4 ) */ \ - "orr r10, r10, r10\n\t" \ - "mov %0, r3" /*result*/ \ - : "=r" (_zzq_result) \ - : "r" (_zzq_default), "r" (&_zzq_args[0]) \ - : "cc","memory", "r3", "r4"); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - unsigned int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* R3 = guest_NRADDR */ \ - "orr r11, r11, r11\n\t" \ - "mov %0, r3" \ - : "=r" (__addr) \ - : \ - : "cc", "memory", "r3" \ - ); \ - _zzq_orig->nraddr = __addr; \ - } - -#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* branch-and-link-to-noredir *%R4 */ \ - "orr r12, r12, r12\n\t" - -#endif /* PLAT_arm_linux */ - -/* ------------------------ ppc32-aix5 ------------------------- */ - -#if defined(PLAT_ppc32_aix5) - -typedef - struct { - unsigned int nraddr; /* where's the code? */ - unsigned int r2; /* what tocptr do we need? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ - "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - \ - { unsigned int _zzq_args[7]; \ - register unsigned int _zzq_result; \ - register unsigned int* _zzq_ptr; \ - _zzq_args[0] = (unsigned int)(_zzq_request); \ - _zzq_args[1] = (unsigned int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned int)(_zzq_arg5); \ - _zzq_args[6] = (unsigned int)(_zzq_default); \ - _zzq_ptr = _zzq_args; \ - __asm__ volatile("mr 4,%1\n\t" \ - "lwz 3, 24(4)\n\t" \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = client_request ( %R4 ) */ \ - "or 1,1,1\n\t" \ - "mr %0,3" \ - : "=b" (_zzq_result) \ - : "b" (_zzq_ptr) \ - : "r3", "r4", "cc", "memory"); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - register unsigned int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR */ \ - "or 2,2,2\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "r3", "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR_GPR2 */ \ - "or 4,4,4\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "r3", "cc", "memory" \ - ); \ - _zzq_orig->r2 = __addr; \ - } - -#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* branch-and-link-to-noredir *%R11 */ \ - "or 3,3,3\n\t" - -#endif /* PLAT_ppc32_aix5 */ - -/* ------------------------ ppc64-aix5 ------------------------- */ - -#if defined(PLAT_ppc64_aix5) - -typedef - struct { - unsigned long long int nraddr; /* where's the code? */ - unsigned long long int r2; /* what tocptr do we need? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ - "rotldi 0,0,61 ; rotldi 0,0,51\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - \ - { unsigned long long int _zzq_args[7]; \ - register unsigned long long int _zzq_result; \ - register unsigned long long int* _zzq_ptr; \ - _zzq_args[0] = (unsigned int long long)(_zzq_request); \ - _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \ - _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \ - _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \ - _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \ - _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \ - _zzq_args[6] = (unsigned int long long)(_zzq_default); \ - _zzq_ptr = _zzq_args; \ - __asm__ volatile("mr 4,%1\n\t" \ - "ld 3, 48(4)\n\t" \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = client_request ( %R4 ) */ \ - "or 1,1,1\n\t" \ - "mr %0,3" \ - : "=b" (_zzq_result) \ - : "b" (_zzq_ptr) \ - : "r3", "r4", "cc", "memory"); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - register unsigned long long int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR */ \ - "or 2,2,2\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "r3", "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR_GPR2 */ \ - "or 4,4,4\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "r3", "cc", "memory" \ - ); \ - _zzq_orig->r2 = __addr; \ - } - -#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* branch-and-link-to-noredir *%R11 */ \ - "or 3,3,3\n\t" - -#endif /* PLAT_ppc64_aix5 */ - -/* Insert assembly code for other platforms here... */ - -#endif /* NVALGRIND */ - - -/* ------------------------------------------------------------------ */ -/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ -/* ugly. It's the least-worst tradeoff I can think of. */ -/* ------------------------------------------------------------------ */ - -/* This section defines magic (a.k.a appalling-hack) macros for doing - guaranteed-no-redirection macros, so as to get from function - wrappers to the functions they are wrapping. The whole point is to - construct standard call sequences, but to do the call itself with a - special no-redirect call pseudo-instruction that the JIT - understands and handles specially. This section is long and - repetitious, and I can't see a way to make it shorter. - - The naming scheme is as follows: - - CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc} - - 'W' stands for "word" and 'v' for "void". Hence there are - different macros for calling arity 0, 1, 2, 3, 4, etc, functions, - and for each, the possibility of returning a word-typed result, or - no result. -*/ - -/* Use these to write the name of your wrapper. NOTE: duplicates - VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */ - -/* Use an extra level of macroisation so as to ensure the soname/fnname - args are fully macro-expanded before pasting them together. */ -#define VG_CONCAT4(_aa,_bb,_cc,_dd) _aa##_bb##_cc##_dd - -#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ - VG_CONCAT4(_vgwZU_,soname,_,fnname) - -#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ - VG_CONCAT4(_vgwZZ_,soname,_,fnname) - -/* Use this macro from within a wrapper function to collect the - context (address and possibly other info) of the original function. - Once you have that you can then use it in one of the CALL_FN_ - macros. The type of the argument _lval is OrigFn. */ -#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) - -/* Derivatives of the main macros below, for calling functions - returning void. */ - -#define CALL_FN_v_v(fnptr) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_v(_junk,fnptr); } while (0) - -#define CALL_FN_v_W(fnptr, arg1) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_W(_junk,fnptr,arg1); } while (0) - -#define CALL_FN_v_WW(fnptr, arg1,arg2) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) - -#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) - -#define CALL_FN_v_WWWW(fnptr, arg1,arg2,arg3,arg4) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_WWWW(_junk,fnptr,arg1,arg2,arg3,arg4); } while (0) - -#define CALL_FN_v_5W(fnptr, arg1,arg2,arg3,arg4,arg5) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_5W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5); } while (0) - -#define CALL_FN_v_6W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_6W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6); } while (0) - -#define CALL_FN_v_7W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6,arg7) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_7W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6,arg7); } while (0) - -/* ------------------------- x86-{linux,darwin} ---------------- */ - -#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) - -/* These regs are trashed by the hidden call. No need to mention eax - as gcc can already see that, plus causes gcc to bomb. */ -#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx" - -/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned - long) == 4. */ - -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[1]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[2]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - __asm__ volatile( \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $4, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - __asm__ volatile( \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $8, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[4]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - __asm__ volatile( \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $12, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[5]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - __asm__ volatile( \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $16, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[6]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - __asm__ volatile( \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $20, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[7]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - __asm__ volatile( \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $24, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[8]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - __asm__ volatile( \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $28, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[9]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - __asm__ volatile( \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $32, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[10]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - __asm__ volatile( \ - "pushl 36(%%eax)\n\t" \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $36, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[11]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - __asm__ volatile( \ - "pushl 40(%%eax)\n\t" \ - "pushl 36(%%eax)\n\t" \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $40, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ - arg6,arg7,arg8,arg9,arg10, \ - arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[12]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - __asm__ volatile( \ - "pushl 44(%%eax)\n\t" \ - "pushl 40(%%eax)\n\t" \ - "pushl 36(%%eax)\n\t" \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $44, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ - arg6,arg7,arg8,arg9,arg10, \ - arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[13]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - _argvec[12] = (unsigned long)(arg12); \ - __asm__ volatile( \ - "pushl 48(%%eax)\n\t" \ - "pushl 44(%%eax)\n\t" \ - "pushl 40(%%eax)\n\t" \ - "pushl 36(%%eax)\n\t" \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $48, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#endif /* PLAT_x86_linux || PLAT_x86_darwin */ - -/* ------------------------ amd64-{linux,darwin} --------------- */ - -#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) - -/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ - -/* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ - "rdi", "r8", "r9", "r10", "r11" - -/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned - long) == 8. */ - -/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_ - macros. In order not to trash the stack redzone, we need to drop - %rsp by 128 before the hidden call, and restore afterwards. The - nastyness is that it is only by luck that the stack still appears - to be unwindable during the hidden call - since then the behaviour - of any routine using this macro does not match what the CFI data - says. Sigh. - - Why is this important? Imagine that a wrapper has a stack - allocated local, and passes to the hidden call, a pointer to it. - Because gcc does not know about the hidden call, it may allocate - that local in the redzone. Unfortunately the hidden call may then - trash it before it comes to use it. So we must step clear of the - redzone, for the duration of the hidden call, to make it safe. - - Probably the same problem afflicts the other redzone-style ABIs too - (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is - self describing (none of this CFI nonsense) so at least messing - with the stack pointer doesn't give a danger of non-unwindable - stack. */ - -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[1]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[2]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[4]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[5]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[6]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[7]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - "addq $128,%%rsp\n\t" \ - VALGRIND_CALL_NOREDIR_RAX \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[8]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $8, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[9]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $16, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[10]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 72(%%rax)\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $24, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[11]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 80(%%rax)\n\t" \ - "pushq 72(%%rax)\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $32, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[12]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 88(%%rax)\n\t" \ - "pushq 80(%%rax)\n\t" \ - "pushq 72(%%rax)\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $40, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[13]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - _argvec[12] = (unsigned long)(arg12); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 96(%%rax)\n\t" \ - "pushq 88(%%rax)\n\t" \ - "pushq 80(%%rax)\n\t" \ - "pushq 72(%%rax)\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $48, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */ - -/* ------------------------ ppc32-linux ------------------------ */ - -#if defined(PLAT_ppc32_linux) - -/* This is useful for finding out about the on-stack stuff: - - extern int f9 ( int,int,int,int,int,int,int,int,int ); - extern int f10 ( int,int,int,int,int,int,int,int,int,int ); - extern int f11 ( int,int,int,int,int,int,int,int,int,int,int ); - extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int ); - - int g9 ( void ) { - return f9(11,22,33,44,55,66,77,88,99); - } - int g10 ( void ) { - return f10(11,22,33,44,55,66,77,88,99,110); - } - int g11 ( void ) { - return f11(11,22,33,44,55,66,77,88,99,110,121); - } - int g12 ( void ) { - return f12(11,22,33,44,55,66,77,88,99,110,121,132); - } -*/ - -/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ - -/* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS \ - "lr", "ctr", "xer", \ - "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ - "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ - "r11", "r12", "r13" - -/* These CALL_FN_ macros assume that on ppc32-linux, - sizeof(unsigned long) == 4. */ - -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[1]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[2]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[4]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[5]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[6]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[7]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[8]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[9]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[10]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - _argvec[9] = (unsigned long)arg9; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "addi 1,1,-16\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,8(1)\n\t" \ - /* args1-8 */ \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "addi 1,1,16\n\t" \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[11]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - _argvec[9] = (unsigned long)arg9; \ - _argvec[10] = (unsigned long)arg10; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "addi 1,1,-16\n\t" \ - /* arg10 */ \ - "lwz 3,40(11)\n\t" \ - "stw 3,12(1)\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,8(1)\n\t" \ - /* args1-8 */ \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "addi 1,1,16\n\t" \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[12]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - _argvec[9] = (unsigned long)arg9; \ - _argvec[10] = (unsigned long)arg10; \ - _argvec[11] = (unsigned long)arg11; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "addi 1,1,-32\n\t" \ - /* arg11 */ \ - "lwz 3,44(11)\n\t" \ - "stw 3,16(1)\n\t" \ - /* arg10 */ \ - "lwz 3,40(11)\n\t" \ - "stw 3,12(1)\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,8(1)\n\t" \ - /* args1-8 */ \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "addi 1,1,32\n\t" \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[13]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - _argvec[9] = (unsigned long)arg9; \ - _argvec[10] = (unsigned long)arg10; \ - _argvec[11] = (unsigned long)arg11; \ - _argvec[12] = (unsigned long)arg12; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "addi 1,1,-32\n\t" \ - /* arg12 */ \ - "lwz 3,48(11)\n\t" \ - "stw 3,20(1)\n\t" \ - /* arg11 */ \ - "lwz 3,44(11)\n\t" \ - "stw 3,16(1)\n\t" \ - /* arg10 */ \ - "lwz 3,40(11)\n\t" \ - "stw 3,12(1)\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,8(1)\n\t" \ - /* args1-8 */ \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "addi 1,1,32\n\t" \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#endif /* PLAT_ppc32_linux */ - -/* ------------------------ ppc64-linux ------------------------ */ - -#if defined(PLAT_ppc64_linux) - -/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ - -/* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS \ - "lr", "ctr", "xer", \ - "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ - "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ - "r11", "r12", "r13" - -/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned - long) == 8. */ - -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+0]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+1]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+2]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+3]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+4]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+5]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+6]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+7]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+8]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+9]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "addi 1,1,-128\n\t" /* expand stack frame */ \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - "addi 1,1,128" /* restore frame */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+10]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "addi 1,1,-128\n\t" /* expand stack frame */ \ - /* arg10 */ \ - "ld 3,80(11)\n\t" \ - "std 3,120(1)\n\t" \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - "addi 1,1,128" /* restore frame */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+11]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "addi 1,1,-144\n\t" /* expand stack frame */ \ - /* arg11 */ \ - "ld 3,88(11)\n\t" \ - "std 3,128(1)\n\t" \ - /* arg10 */ \ - "ld 3,80(11)\n\t" \ - "std 3,120(1)\n\t" \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - "addi 1,1,144" /* restore frame */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+12]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - _argvec[2+12] = (unsigned long)arg12; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "addi 1,1,-144\n\t" /* expand stack frame */ \ - /* arg12 */ \ - "ld 3,96(11)\n\t" \ - "std 3,136(1)\n\t" \ - /* arg11 */ \ - "ld 3,88(11)\n\t" \ - "std 3,128(1)\n\t" \ - /* arg10 */ \ - "ld 3,80(11)\n\t" \ - "std 3,120(1)\n\t" \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - "addi 1,1,144" /* restore frame */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#endif /* PLAT_ppc64_linux */ - -/* ------------------------- arm-linux ------------------------- */ - -#if defined(PLAT_arm_linux) - -/* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS "r0", "r1", "r2", "r3","r4","r14" - -/* These CALL_FN_ macros assume that on arm-linux, sizeof(unsigned - long) == 4. */ - -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[1]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "mov %0, r0\n" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[2]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - __asm__ volatile( \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "mov %0, r0\n" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - __asm__ volatile( \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "mov %0, r0\n" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[4]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - __asm__ volatile( \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "mov %0, r0\n" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[5]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - __asm__ volatile( \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r3, [%1, #16] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "mov %0, r0" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[6]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - __asm__ volatile( \ - "ldr r0, [%1, #20] \n\t" \ - "push {r0} \n\t" \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r3, [%1, #16] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "add sp, sp, #4 \n\t" \ - "mov %0, r0" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[7]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - __asm__ volatile( \ - "ldr r0, [%1, #20] \n\t" \ - "ldr r1, [%1, #24] \n\t" \ - "push {r0, r1} \n\t" \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r3, [%1, #16] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "add sp, sp, #8 \n\t" \ - "mov %0, r0" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[8]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - __asm__ volatile( \ - "ldr r0, [%1, #20] \n\t" \ - "ldr r1, [%1, #24] \n\t" \ - "ldr r2, [%1, #28] \n\t" \ - "push {r0, r1, r2} \n\t" \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r3, [%1, #16] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "add sp, sp, #12 \n\t" \ - "mov %0, r0" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[9]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - __asm__ volatile( \ - "ldr r0, [%1, #20] \n\t" \ - "ldr r1, [%1, #24] \n\t" \ - "ldr r2, [%1, #28] \n\t" \ - "ldr r3, [%1, #32] \n\t" \ - "push {r0, r1, r2, r3} \n\t" \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r3, [%1, #16] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "add sp, sp, #16 \n\t" \ - "mov %0, r0" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[10]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - __asm__ volatile( \ - "ldr r0, [%1, #20] \n\t" \ - "ldr r1, [%1, #24] \n\t" \ - "ldr r2, [%1, #28] \n\t" \ - "ldr r3, [%1, #32] \n\t" \ - "ldr r4, [%1, #36] \n\t" \ - "push {r0, r1, r2, r3, r4} \n\t" \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r3, [%1, #16] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "add sp, sp, #20 \n\t" \ - "mov %0, r0" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[11]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - __asm__ volatile( \ - "ldr r0, [%1, #40] \n\t" \ - "push {r0} \n\t" \ - "ldr r0, [%1, #20] \n\t" \ - "ldr r1, [%1, #24] \n\t" \ - "ldr r2, [%1, #28] \n\t" \ - "ldr r3, [%1, #32] \n\t" \ - "ldr r4, [%1, #36] \n\t" \ - "push {r0, r1, r2, r3, r4} \n\t" \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r3, [%1, #16] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "add sp, sp, #24 \n\t" \ - "mov %0, r0" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ - arg6,arg7,arg8,arg9,arg10, \ - arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[12]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - __asm__ volatile( \ - "ldr r0, [%1, #40] \n\t" \ - "ldr r1, [%1, #44] \n\t" \ - "push {r0, r1} \n\t" \ - "ldr r0, [%1, #20] \n\t" \ - "ldr r1, [%1, #24] \n\t" \ - "ldr r2, [%1, #28] \n\t" \ - "ldr r3, [%1, #32] \n\t" \ - "ldr r4, [%1, #36] \n\t" \ - "push {r0, r1, r2, r3, r4} \n\t" \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r3, [%1, #16] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "add sp, sp, #28 \n\t" \ - "mov %0, r0" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory",__CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ - arg6,arg7,arg8,arg9,arg10, \ - arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[13]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - _argvec[12] = (unsigned long)(arg12); \ - __asm__ volatile( \ - "ldr r0, [%1, #40] \n\t" \ - "ldr r1, [%1, #44] \n\t" \ - "ldr r2, [%1, #48] \n\t" \ - "push {r0, r1, r2} \n\t" \ - "ldr r0, [%1, #20] \n\t" \ - "ldr r1, [%1, #24] \n\t" \ - "ldr r2, [%1, #28] \n\t" \ - "ldr r3, [%1, #32] \n\t" \ - "ldr r4, [%1, #36] \n\t" \ - "push {r0, r1, r2, r3, r4} \n\t" \ - "ldr r0, [%1, #4] \n\t" \ - "ldr r1, [%1, #8] \n\t" \ - "ldr r2, [%1, #12] \n\t" \ - "ldr r3, [%1, #16] \n\t" \ - "ldr r4, [%1] \n\t" /* target->r4 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ - "add sp, sp, #32 \n\t" \ - "mov %0, r0" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "0" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#endif /* PLAT_arm_linux */ - -/* ------------------------ ppc32-aix5 ------------------------- */ - -#if defined(PLAT_ppc32_aix5) - -/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ - -/* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS \ - "lr", "ctr", "xer", \ - "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ - "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ - "r11", "r12", "r13" - -/* Expand the stack frame, copying enough info that unwinding - still works. Trashes r3. */ - -#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ - "addi 1,1,-" #_n_fr "\n\t" \ - "lwz 3," #_n_fr "(1)\n\t" \ - "stw 3,0(1)\n\t" - -#define VG_CONTRACT_FRAME_BY(_n_fr) \ - "addi 1,1," #_n_fr "\n\t" - -/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned - long) == 4. */ - -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+0]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+1]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+2]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+3]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+4]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+5]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ - "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+6]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ - "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ - "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+7]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ - "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ - "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ - "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+8]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ - "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ - "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ - "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ - "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+9]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - VG_EXPAND_FRAME_BY_trashes_r3(64) \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,56(1)\n\t" \ - /* args1-8 */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ - "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ - "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ - "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ - "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(64) \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+10]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - VG_EXPAND_FRAME_BY_trashes_r3(64) \ - /* arg10 */ \ - "lwz 3,40(11)\n\t" \ - "stw 3,60(1)\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,56(1)\n\t" \ - /* args1-8 */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ - "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ - "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ - "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ - "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(64) \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+11]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - VG_EXPAND_FRAME_BY_trashes_r3(72) \ - /* arg11 */ \ - "lwz 3,44(11)\n\t" \ - "stw 3,64(1)\n\t" \ - /* arg10 */ \ - "lwz 3,40(11)\n\t" \ - "stw 3,60(1)\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,56(1)\n\t" \ - /* args1-8 */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ - "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ - "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ - "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ - "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(72) \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+12]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - _argvec[2+12] = (unsigned long)arg12; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "stw 2,-8(11)\n\t" /* save tocptr */ \ - "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ - VG_EXPAND_FRAME_BY_trashes_r3(72) \ - /* arg12 */ \ - "lwz 3,48(11)\n\t" \ - "stw 3,68(1)\n\t" \ - /* arg11 */ \ - "lwz 3,44(11)\n\t" \ - "stw 3,64(1)\n\t" \ - /* arg10 */ \ - "lwz 3,40(11)\n\t" \ - "stw 3,60(1)\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,56(1)\n\t" \ - /* args1-8 */ \ - "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ - "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ - "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ - "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ - "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ - "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ - "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ - "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ - "lwz 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "lwz 2,-8(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(72) \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#endif /* PLAT_ppc32_aix5 */ - -/* ------------------------ ppc64-aix5 ------------------------- */ - -#if defined(PLAT_ppc64_aix5) - -/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ - -/* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS \ - "lr", "ctr", "xer", \ - "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ - "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ - "r11", "r12", "r13" - -/* Expand the stack frame, copying enough info that unwinding - still works. Trashes r3. */ - -#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ - "addi 1,1,-" #_n_fr "\n\t" \ - "ld 3," #_n_fr "(1)\n\t" \ - "std 3,0(1)\n\t" - -#define VG_CONTRACT_FRAME_BY(_n_fr) \ - "addi 1,1," #_n_fr "\n\t" - -/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned - long) == 8. */ - -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+0]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+1]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+2]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+3]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+4]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+5]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+6]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+7]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+8]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+9]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - VG_EXPAND_FRAME_BY_trashes_r3(128) \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(128) \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+10]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - VG_EXPAND_FRAME_BY_trashes_r3(128) \ - /* arg10 */ \ - "ld 3,80(11)\n\t" \ - "std 3,120(1)\n\t" \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(128) \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+11]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - VG_EXPAND_FRAME_BY_trashes_r3(144) \ - /* arg11 */ \ - "ld 3,88(11)\n\t" \ - "std 3,128(1)\n\t" \ - /* arg10 */ \ - "ld 3,80(11)\n\t" \ - "std 3,120(1)\n\t" \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(144) \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+12]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - _argvec[2+12] = (unsigned long)arg12; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - VG_EXPAND_FRAME_BY_trashes_r3(512) \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - VG_EXPAND_FRAME_BY_trashes_r3(144) \ - /* arg12 */ \ - "ld 3,96(11)\n\t" \ - "std 3,136(1)\n\t" \ - /* arg11 */ \ - "ld 3,88(11)\n\t" \ - "std 3,128(1)\n\t" \ - /* arg10 */ \ - "ld 3,80(11)\n\t" \ - "std 3,120(1)\n\t" \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - VG_CONTRACT_FRAME_BY(144) \ - VG_CONTRACT_FRAME_BY(512) \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#endif /* PLAT_ppc64_aix5 */ - - -/* ------------------------------------------------------------------ */ -/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ -/* */ -/* ------------------------------------------------------------------ */ - -/* Some request codes. There are many more of these, but most are not - exposed to end-user view. These are the public ones, all of the - form 0x1000 + small_number. - - Core ones are in the range 0x00000000--0x0000ffff. The non-public - ones start at 0x2000. -*/ - -/* These macros are used by tools -- they must be public, but don't - embed them into other programs. */ -#define VG_USERREQ_TOOL_BASE(a,b) \ - ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) -#define VG_IS_TOOL_USERREQ(a, b, v) \ - (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) - -/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! - This enum comprises an ABI exported by Valgrind to programs - which use client requests. DO NOT CHANGE THE ORDER OF THESE - ENTRIES, NOR DELETE ANY -- add new ones at the end. */ -typedef - enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, - VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, - - /* These allow any function to be called from the simulated - CPU but run on the real CPU. Nb: the first arg passed to - the function is always the ThreadId of the running - thread! So CLIENT_CALL0 actually requires a 1 arg - function, etc. */ - VG_USERREQ__CLIENT_CALL0 = 0x1101, - VG_USERREQ__CLIENT_CALL1 = 0x1102, - VG_USERREQ__CLIENT_CALL2 = 0x1103, - VG_USERREQ__CLIENT_CALL3 = 0x1104, - - /* Can be useful in regression testing suites -- eg. can - send Valgrind's output to /dev/null and still count - errors. */ - VG_USERREQ__COUNT_ERRORS = 0x1201, - - /* These are useful and can be interpreted by any tool that - tracks malloc() et al, by using vg_replace_malloc.c. */ - VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, - VG_USERREQ__FREELIKE_BLOCK = 0x1302, - /* Memory pool support. */ - VG_USERREQ__CREATE_MEMPOOL = 0x1303, - VG_USERREQ__DESTROY_MEMPOOL = 0x1304, - VG_USERREQ__MEMPOOL_ALLOC = 0x1305, - VG_USERREQ__MEMPOOL_FREE = 0x1306, - VG_USERREQ__MEMPOOL_TRIM = 0x1307, - VG_USERREQ__MOVE_MEMPOOL = 0x1308, - VG_USERREQ__MEMPOOL_CHANGE = 0x1309, - VG_USERREQ__MEMPOOL_EXISTS = 0x130a, - - /* Allow printfs to valgrind log. */ - /* The first two pass the va_list argument by value, which - assumes it is the same size as or smaller than a UWord, - which generally isn't the case. Hence are deprecated. - The second two pass the vargs by reference and so are - immune to this problem. */ - /* both :: char* fmt, va_list vargs (DEPRECATED) */ - VG_USERREQ__PRINTF = 0x1401, - VG_USERREQ__PRINTF_BACKTRACE = 0x1402, - /* both :: char* fmt, va_list* vargs */ - VG_USERREQ__PRINTF_VALIST_BY_REF = 0x1403, - VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF = 0x1404, - - /* Stack support. */ - VG_USERREQ__STACK_REGISTER = 0x1501, - VG_USERREQ__STACK_DEREGISTER = 0x1502, - VG_USERREQ__STACK_CHANGE = 0x1503, - - /* Wine support */ - VG_USERREQ__LOAD_PDB_DEBUGINFO = 0x1601 - } Vg_ClientRequest; - -#if !defined(__GNUC__) -# define __extension__ /* */ -#endif - -/* Returns the number of Valgrinds this code is running under. That - is, 0 if running natively, 1 if running under Valgrind, 2 if - running under Valgrind which is running under another Valgrind, - etc. */ -#define RUNNING_ON_VALGRIND __extension__ \ - ({unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \ - VG_USERREQ__RUNNING_ON_VALGRIND, \ - 0, 0, 0, 0, 0); \ - _qzz_res; \ - }) - - -/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + - _qzz_len - 1]. Useful if you are debugging a JITter or some such, - since it provides a way to make sure valgrind will retranslate the - invalidated area. Returns no value. */ -#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__DISCARD_TRANSLATIONS, \ - _qzz_addr, _qzz_len, 0, 0, 0); \ - } - - -/* These requests are for getting Valgrind itself to print something. - Possibly with a backtrace. This is a really ugly hack. The return value - is the number of characters printed, excluding the "**<pid>** " part at the - start and the backtrace (if present). */ - -#if defined(NVALGRIND) - -# define VALGRIND_PRINTF(...) -# define VALGRIND_PRINTF_BACKTRACE(...) - -#else /* NVALGRIND */ - -/* Modern GCC will optimize the static routine out if unused, - and unused attribute will shut down warnings about it. */ -static int VALGRIND_PRINTF(const char *format, ...) - __attribute__((format(__printf__, 1, 2), __unused__)); -static int -VALGRIND_PRINTF(const char *format, ...) -{ - unsigned long _qzz_res; - va_list vargs; - va_start(vargs, format); - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, - VG_USERREQ__PRINTF_VALIST_BY_REF, - (unsigned long)format, - (unsigned long)&vargs, - 0, 0, 0); - va_end(vargs); - return (int)_qzz_res; -} - -static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) - __attribute__((format(__printf__, 1, 2), __unused__)); -static int -VALGRIND_PRINTF_BACKTRACE(const char *format, ...) -{ - unsigned long _qzz_res; - va_list vargs; - va_start(vargs, format); - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, - VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF, - (unsigned long)format, - (unsigned long)&vargs, - 0, 0, 0); - va_end(vargs); - return (int)_qzz_res; -} - -#endif /* NVALGRIND */ - - -/* These requests allow control to move from the simulated CPU to the - real CPU, calling an arbitary function. - - Note that the current ThreadId is inserted as the first argument. - So this call: - - VALGRIND_NON_SIMD_CALL2(f, arg1, arg2) - - requires f to have this signature: - - Word f(Word tid, Word arg1, Word arg2) - - where "Word" is a word-sized type. - - Note that these client requests are not entirely reliable. For example, - if you call a function with them that subsequently calls printf(), - there's a high chance Valgrind will crash. Generally, your prospects of - these working are made higher if the called function does not refer to - any global variables, and does not refer to any libc or other functions - (printf et al). Any kind of entanglement with libc or dynamic linking is - likely to have a bad outcome, for tricky reasons which we've grappled - with a lot in the past. -*/ -#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ - __extension__ \ - ({unsigned long _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__CLIENT_CALL0, \ - _qyy_fn, \ - 0, 0, 0, 0); \ - _qyy_res; \ - }) - -#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ - __extension__ \ - ({unsigned long _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__CLIENT_CALL1, \ - _qyy_fn, \ - _qyy_arg1, 0, 0, 0); \ - _qyy_res; \ - }) - -#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ - __extension__ \ - ({unsigned long _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__CLIENT_CALL2, \ - _qyy_fn, \ - _qyy_arg1, _qyy_arg2, 0, 0); \ - _qyy_res; \ - }) - -#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ - __extension__ \ - ({unsigned long _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__CLIENT_CALL3, \ - _qyy_fn, \ - _qyy_arg1, _qyy_arg2, \ - _qyy_arg3, 0); \ - _qyy_res; \ - }) - - -/* Counts the number of errors that have been recorded by a tool. Nb: - the tool must record the errors with VG_(maybe_record_error)() or - VG_(unique_error)() for them to be counted. */ -#define VALGRIND_COUNT_ERRORS \ - __extension__ \ - ({unsigned int _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__COUNT_ERRORS, \ - 0, 0, 0, 0, 0); \ - _qyy_res; \ - }) - -/* Several Valgrind tools (Memcheck, Massif, Helgrind, DRD) rely on knowing - when heap blocks are allocated in order to give accurate results. This - happens automatically for the standard allocator functions such as - malloc(), calloc(), realloc(), memalign(), new, new[], free(), delete, - delete[], etc. - - But if your program uses a custom allocator, this doesn't automatically - happen, and Valgrind will not do as well. For example, if you allocate - superblocks with mmap() and then allocates chunks of the superblocks, all - Valgrind's observations will be at the mmap() level and it won't know that - the chunks should be considered separate entities. In Memcheck's case, - that means you probably won't get heap block overrun detection (because - there won't be redzones marked as unaddressable) and you definitely won't - get any leak detection. - - The following client requests allow a custom allocator to be annotated so - that it can be handled accurately by Valgrind. - - VALGRIND_MALLOCLIKE_BLOCK marks a region of memory as having been allocated - by a malloc()-like function. For Memcheck (an illustrative case), this - does two things: - - - It records that the block has been allocated. This means any addresses - within the block mentioned in error messages will be - identified as belonging to the block. It also means that if the block - isn't freed it will be detected by the leak checker. - - - It marks the block as being addressable and undefined (if 'is_zeroed' is - not set), or addressable and defined (if 'is_zeroed' is set). This - controls how accesses to the block by the program are handled. - - 'addr' is the start of the usable block (ie. after any - redzone), 'sizeB' is its size. 'rzB' is the redzone size if the allocator - can apply redzones -- these are blocks of padding at the start and end of - each block. Adding redzones is recommended as it makes it much more likely - Valgrind will spot block overruns. `is_zeroed' indicates if the memory is - zeroed (or filled with another predictable value), as is the case for - calloc(). - - VALGRIND_MALLOCLIKE_BLOCK should be put immediately after the point where a - heap block -- that will be used by the client program -- is allocated. - It's best to put it at the outermost level of the allocator if possible; - for example, if you have a function my_alloc() which calls - internal_alloc(), and the client request is put inside internal_alloc(), - stack traces relating to the heap block will contain entries for both - my_alloc() and internal_alloc(), which is probably not what you want. - - For Memcheck users: if you use VALGRIND_MALLOCLIKE_BLOCK to carve out - custom blocks from within a heap block, B, that has been allocated with - malloc/calloc/new/etc, then block B will be *ignored* during leak-checking - -- the custom blocks will take precedence. - - VALGRIND_FREELIKE_BLOCK is the partner to VALGRIND_MALLOCLIKE_BLOCK. For - Memcheck, it does two things: - - - It records that the block has been deallocated. This assumes that the - block was annotated as having been allocated via - VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued. - - - It marks the block as being unaddressable. - - VALGRIND_FREELIKE_BLOCK should be put immediately after the point where a - heap block is deallocated. - - In many cases, these two client requests will not be enough to get your - allocator working well with Memcheck. More specifically, if your allocator - writes to freed blocks in any way then a VALGRIND_MAKE_MEM_UNDEFINED call - will be necessary to mark the memory as addressable just before the zeroing - occurs, otherwise you'll get a lot of invalid write errors. For example, - you'll need to do this if your allocator recycles freed blocks, but it - zeroes them before handing them back out (via VALGRIND_MALLOCLIKE_BLOCK). - Alternatively, if your allocator reuses freed blocks for allocator-internal - data structures, VALGRIND_MAKE_MEM_UNDEFINED calls will also be necessary. - - Really, what's happening is a blurring of the lines between the client - program and the allocator... after VALGRIND_FREELIKE_BLOCK is called, the - memory should be considered unaddressable to the client program, but the - allocator knows more than the rest of the client program and so may be able - to safely access it. Extra client requests are necessary for Valgrind to - understand the distinction between the allocator and the rest of the - program. - - Note: there is currently no VALGRIND_REALLOCLIKE_BLOCK client request; it - has to be emulated with MALLOCLIKE/FREELIKE and memory copying. - - Ignored if addr == 0. -*/ -#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MALLOCLIKE_BLOCK, \ - addr, sizeB, rzB, is_zeroed, 0); \ - } - -/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details. - Ignored if addr == 0. -*/ -#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__FREELIKE_BLOCK, \ - addr, rzB, 0, 0, 0); \ - } - -/* Create a memory pool. */ -#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__CREATE_MEMPOOL, \ - pool, rzB, is_zeroed, 0, 0); \ - } - -/* Destroy a memory pool. */ -#define VALGRIND_DESTROY_MEMPOOL(pool) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__DESTROY_MEMPOOL, \ - pool, 0, 0, 0, 0); \ - } - -/* Associate a piece of memory with a memory pool. */ -#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_ALLOC, \ - pool, addr, size, 0, 0); \ - } - -/* Disassociate a piece of memory from a memory pool. */ -#define VALGRIND_MEMPOOL_FREE(pool, addr) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_FREE, \ - pool, addr, 0, 0, 0); \ - } - -/* Disassociate any pieces outside a particular range. */ -#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_TRIM, \ - pool, addr, size, 0, 0); \ - } - -/* Resize and/or move a piece associated with a memory pool. */ -#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MOVE_MEMPOOL, \ - poolA, poolB, 0, 0, 0); \ - } - -/* Resize and/or move a piece associated with a memory pool. */ -#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_CHANGE, \ - pool, addrA, addrB, size, 0); \ - } - -/* Return 1 if a mempool exists, else 0. */ -#define VALGRIND_MEMPOOL_EXISTS(pool) \ - __extension__ \ - ({unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_EXISTS, \ - pool, 0, 0, 0, 0); \ - _qzz_res; \ - }) - -/* Mark a piece of memory as being a stack. Returns a stack id. */ -#define VALGRIND_STACK_REGISTER(start, end) \ - __extension__ \ - ({unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__STACK_REGISTER, \ - start, end, 0, 0, 0); \ - _qzz_res; \ - }) - -/* Unmark the piece of memory associated with a stack id as being a - stack. */ -#define VALGRIND_STACK_DEREGISTER(id) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__STACK_DEREGISTER, \ - id, 0, 0, 0, 0); \ - } - -/* Change the start and end address of the stack id. */ -#define VALGRIND_STACK_CHANGE(id, start, end) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__STACK_CHANGE, \ - id, start, end, 0, 0); \ - } - -/* Load PDB debug info for Wine PE image_map. */ -#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__LOAD_PDB_DEBUGINFO, \ - fd, ptr, total_size, delta, 0); \ - } - - -#undef PLAT_x86_linux -#undef PLAT_amd64_linux -#undef PLAT_ppc32_linux -#undef PLAT_ppc64_linux -#undef PLAT_arm_linux -#undef PLAT_ppc32_aix5 -#undef PLAT_ppc64_aix5 - -#endif // UTIL_VALGRIND_H_ |