diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2019-09-25 15:35:57 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2019-09-25 15:35:57 +0900 |
commit | de99325ffe35c346d71d37e5f60b67e29313c1f2 (patch) | |
tree | 937eb9611b8c5952744ba895f711b70b0c565b58 | |
parent | 07d538b47cc1d986c87b748fe5961beb1e8d2fbd (diff) | |
download | re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.tar.gz re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.tar.bz2 re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.zip |
Imported Upstream version 20190301upstream/20190301
-rw-r--r-- | .travis.yml | 11 | ||||
-rw-r--r-- | README | 2 | ||||
-rwxr-xr-x | kokoro/bazel.sh | 26 | ||||
-rwxr-xr-x | kokoro/cmake.sh | 25 | ||||
-rwxr-xr-x | kokoro/macos-bazel.sh | 26 | ||||
-rwxr-xr-x | kokoro/macos-cmake.sh | 14 | ||||
-rwxr-xr-x | kokoro/ubuntu-bazel.sh | 26 | ||||
-rwxr-xr-x | kokoro/windows-bazel.bat | 32 | ||||
-rwxr-xr-x | kokoro/windows-cmake.bat | 20 | ||||
-rw-r--r-- | re2/fuzzing/re2_fuzzer.cc | 18 | ||||
-rw-r--r-- | re2/nfa.cc | 15 | ||||
-rw-r--r-- | re2/onepass.cc | 8 | ||||
-rw-r--r-- | re2/parse.cc | 18 | ||||
-rw-r--r-- | re2/prog.cc | 2 | ||||
-rw-r--r-- | re2/prog.h | 2 | ||||
-rw-r--r-- | re2/re2.cc | 2 | ||||
-rw-r--r-- | re2/re2.h | 54 | ||||
-rw-r--r-- | re2/stringpiece.h | 12 | ||||
-rw-r--r-- | util/sparse_array.h | 286 | ||||
-rw-r--r-- | util/sparse_set.h | 74 |
20 files changed, 258 insertions, 415 deletions
diff --git a/.travis.yml b/.travis.yml index 674cda5..0ddc4a9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -154,6 +154,17 @@ matrix: - clang-7 env: - MATRIX_EVAL="CC=clang-7 CXX=clang++-7" + - os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-8 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' + packages: + - clang-8 + env: + - MATRIX_EVAL="CC=clang-8 CXX=clang++-8" before_install: - eval "${MATRIX_EVAL}" @@ -28,7 +28,7 @@ under the BSD-style license found in the LICENSE file. RE2's native language is C++. A C wrapper is at https://github.com/marcomaggi/cre2/. -An Erlang wrapper is at https://github.com/tuncer/re2/ and on Hex (hex.pm). +An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm). An Inferno wrapper is at https://github.com/powerman/inferno-re2/. A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com). An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org). diff --git a/kokoro/bazel.sh b/kokoro/bazel.sh new file mode 100755 index 0000000..6f25982 --- /dev/null +++ b/kokoro/bazel.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -eux + +cd git/re2 + +bazel clean +bazel build --compilation_mode=dbg -- //... +bazel test --compilation_mode=dbg --test_output=errors -- //... \ + -//:dfa_test \ + -//:exhaustive1_test \ + -//:exhaustive2_test \ + -//:exhaustive3_test \ + -//:exhaustive_test \ + -//:random_test + +bazel clean +bazel build --compilation_mode=opt -- //... +bazel test --compilation_mode=opt --test_output=errors -- //... \ + -//:dfa_test \ + -//:exhaustive1_test \ + -//:exhaustive2_test \ + -//:exhaustive3_test \ + -//:exhaustive_test \ + -//:random_test + +exit 0 diff --git a/kokoro/cmake.sh b/kokoro/cmake.sh new file mode 100755 index 0000000..999fbfe --- /dev/null +++ b/kokoro/cmake.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -eux + +cd git/re2 + +case "${KOKORO_JOB_NAME}" in + */windows-*) + CMAKE_G_A_FLAGS=('-G' 'Visual Studio 14 2015' '-A' 'x64') + ;; + *) + CMAKE_G_A_FLAGS=() + # Work around a bug in older versions of bash. :/ + set +u + ;; +esac + +cmake -D CMAKE_BUILD_TYPE=Debug "${CMAKE_G_A_FLAGS[@]}" . +cmake --build . --config Debug --clean-first +ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random' + +cmake -D CMAKE_BUILD_TYPE=Release "${CMAKE_G_A_FLAGS[@]}" . +cmake --build . --config Release --clean-first +ctest -C Release --output-on-failure -E 'dfa|exhaustive|random' + +exit 0 diff --git a/kokoro/macos-bazel.sh b/kokoro/macos-bazel.sh index 6f25982..e43c852 100755 --- a/kokoro/macos-bazel.sh +++ b/kokoro/macos-bazel.sh @@ -1,26 +1,4 @@ #!/bin/bash set -eux - -cd git/re2 - -bazel clean -bazel build --compilation_mode=dbg -- //... -bazel test --compilation_mode=dbg --test_output=errors -- //... \ - -//:dfa_test \ - -//:exhaustive1_test \ - -//:exhaustive2_test \ - -//:exhaustive3_test \ - -//:exhaustive_test \ - -//:random_test - -bazel clean -bazel build --compilation_mode=opt -- //... -bazel test --compilation_mode=opt --test_output=errors -- //... \ - -//:dfa_test \ - -//:exhaustive1_test \ - -//:exhaustive2_test \ - -//:exhaustive3_test \ - -//:exhaustive_test \ - -//:random_test - -exit 0 +bash git/re2/kokoro/bazel.sh +exit $? diff --git a/kokoro/macos-cmake.sh b/kokoro/macos-cmake.sh index dbe94cb..ef4b7dc 100755 --- a/kokoro/macos-cmake.sh +++ b/kokoro/macos-cmake.sh @@ -1,14 +1,4 @@ #!/bin/bash set -eux - -cd git/re2 - -cmake -D CMAKE_BUILD_TYPE=Debug . -cmake --build . --config Debug --clean-first -ctest -C Debug --output-on-failure -E dfa\|exhaustive\|random - -cmake -D CMAKE_BUILD_TYPE=Release . -cmake --build . --config Release --clean-first -ctest -C Release --output-on-failure -E dfa\|exhaustive\|random - -exit 0 +bash git/re2/kokoro/cmake.sh +exit $? diff --git a/kokoro/ubuntu-bazel.sh b/kokoro/ubuntu-bazel.sh index 6f25982..e43c852 100755 --- a/kokoro/ubuntu-bazel.sh +++ b/kokoro/ubuntu-bazel.sh @@ -1,26 +1,4 @@ #!/bin/bash set -eux - -cd git/re2 - -bazel clean -bazel build --compilation_mode=dbg -- //... -bazel test --compilation_mode=dbg --test_output=errors -- //... \ - -//:dfa_test \ - -//:exhaustive1_test \ - -//:exhaustive2_test \ - -//:exhaustive3_test \ - -//:exhaustive_test \ - -//:random_test - -bazel clean -bazel build --compilation_mode=opt -- //... -bazel test --compilation_mode=opt --test_output=errors -- //... \ - -//:dfa_test \ - -//:exhaustive1_test \ - -//:exhaustive2_test \ - -//:exhaustive3_test \ - -//:exhaustive_test \ - -//:random_test - -exit 0 +bash git/re2/kokoro/bazel.sh +exit $? diff --git a/kokoro/windows-bazel.bat b/kokoro/windows-bazel.bat index 9ac3d5b..283f8d2 100755 --- a/kokoro/windows-bazel.bat +++ b/kokoro/windows-bazel.bat @@ -1,30 +1,2 @@ -CD git/re2 ^ - || EXIT /B 1 - -bazel clean ^ - || EXIT /B 1 -bazel build --compilation_mode=dbg -- //... ^ - || EXIT /B 1 -bazel test --compilation_mode=dbg --test_output=errors -- //... ^ - -//:dfa_test ^ - -//:exhaustive1_test ^ - -//:exhaustive2_test ^ - -//:exhaustive3_test ^ - -//:exhaustive_test ^ - -//:random_test ^ - || EXIT /B 1 - -bazel clean ^ - || EXIT /B 1 -bazel build --compilation_mode=opt -- //... ^ - || EXIT /B 1 -bazel test --compilation_mode=opt --test_output=errors -- //... ^ - -//:dfa_test ^ - -//:exhaustive1_test ^ - -//:exhaustive2_test ^ - -//:exhaustive3_test ^ - -//:exhaustive_test ^ - -//:random_test ^ - || EXIT /B 1 - -EXIT /B 0 +bash git/re2/kokoro/bazel.sh +EXIT /B %ERRORLEVEL% diff --git a/kokoro/windows-cmake.bat b/kokoro/windows-cmake.bat index e926d88..77a4db9 100755 --- a/kokoro/windows-cmake.bat +++ b/kokoro/windows-cmake.bat @@ -1,18 +1,2 @@ -CD git/re2 ^ - || EXIT /B 1 - -cmake -D CMAKE_BUILD_TYPE=Debug -G "Visual Studio 14 2015" -A x64 . ^ - || EXIT /B 1 -cmake --build . --config Debug --clean-first ^ - || EXIT /B 1 -ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random ^ - || EXIT /B 1 - -cmake -D CMAKE_BUILD_TYPE=Release -G "Visual Studio 14 2015" -A x64 . ^ - || EXIT /B 1 -cmake --build . --config Release --clean-first ^ - || EXIT /B 1 -ctest -C Release --output-on-failure -E dfa^|exhaustive^|random ^ - || EXIT /B 1 - -EXIT /B 0 +bash git/re2/kokoro/cmake.sh +EXIT /B %ERRORLEVEL% diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc index 084b5c0..5e5d324 100644 --- a/re2/fuzzing/re2_fuzzer.cc +++ b/re2/fuzzing/re2_fuzzer.cc @@ -106,26 +106,32 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { if (size == 0 || size > 999) return 0; - // Crudely limit the use of ., \p and \P. - // Otherwise, we will waste time on inputs that have long runs of Unicode + // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W. + // Otherwise, we will waste time on inputs that have long runs of various // character classes. The fuzzer has shown itself to be easily capable of // generating such patterns that fall within the other limits, but result // in timeouts nonetheless. The marginal cost is high - even more so when // counted repetition is involved - whereas the marginal benefit is zero. - int dot = 0; - int backslash_p = 0; + // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain. + int char_class = 0; + int backslash_p = 0; // very expensive, so handle specially for (size_t i = 0; i < size; i++) { if (data[i] == '.') - dot++; + char_class++; if (data[i] != '\\') continue; i++; if (i >= size) break; + if (data[i] == 'p' || data[i] == 'P' || + data[i] == 'd' || data[i] == 'D' || + data[i] == 's' || data[i] == 'S' || + data[i] == 'w' || data[i] == 'W') + char_class++; if (data[i] == 'p' || data[i] == 'P') backslash_p++; } - if (dot > 99) + if (char_class > 9) return 0; if (backslash_p > 1) return 0; @@ -238,8 +238,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, // or we might not. Even if not, it is necessary to have it, // so that we don't revisit id0 during the recursion. q->set_new(id, NULL); - - Thread** tp = &q->find(id)->second; + Thread** tp = &q->get_existing(id); int j; Thread* t; Prog::Inst* ip = prog_->inst(id); @@ -329,7 +328,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, nextq->clear(); for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { - Thread* t = i->second; + Thread* t = i->value(); if (t == NULL) continue; @@ -364,7 +363,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, Decref(t); for (++i; i != runq->end(); ++i) - Decref(i->second); + Decref(i->value()); runq->clear(); if (ip->greedy(prog_)) return ip->out1(); @@ -403,7 +402,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, // rest of the current Threadq. Decref(t); for (++i; i != runq->end(); ++i) - Decref(i->second); + Decref(i->value()); runq->clear(); return 0; } @@ -505,7 +504,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, fprintf(stderr, "%c:", c); for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { - Thread* t = i->second; + Thread* t = i->value(); if (t == NULL) continue; fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str()); @@ -586,7 +585,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, } for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) - Decref(i->second); + Decref(i->value()); if (matched_) { for (int i = 0; i < nsubmatch; i++) @@ -697,7 +696,7 @@ void Prog::Fanout(SparseArray<int>* fanout) { fanout->clear(); fanout->set_new(start(), 0); for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) { - int* count = &i->second; + int* count = &i->value(); reachable.clear(); reachable.insert(i->index()); for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) { diff --git a/re2/onepass.cc b/re2/onepass.cc index 7d39290..edd2c48 100644 --- a/re2/onepass.cc +++ b/re2/onepass.cc @@ -244,7 +244,7 @@ bool Prog::SearchOnePass(const StringPiece& text, if (anchor_end()) kind = kFullMatch; - uint8_t* nodes = onepass_nodes_; + uint8_t* nodes = onepass_nodes_.data(); int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t); // start() is always mapped to the zeroth OneState. OneState* state = IndexToNode(nodes, statesize, 0); @@ -383,7 +383,7 @@ struct InstCond { // Constructs and saves corresponding one-pass NFA on success. bool Prog::IsOnePass() { if (did_onepass_) - return onepass_nodes_ != NULL; + return onepass_nodes_.data() != NULL; did_onepass_ = true; if (start() == 0) // no match @@ -612,8 +612,8 @@ bool Prog::IsOnePass() { } dfa_mem_ -= nalloc*statesize; - onepass_nodes_ = new uint8_t[nalloc*statesize]; - memmove(onepass_nodes_, nodes.data(), nalloc*statesize); + onepass_nodes_ = PODArray<uint8_t>(nalloc*statesize); + memmove(onepass_nodes_.data(), nodes.data(), nalloc*statesize); return true; fail: diff --git a/re2/parse.cc b/re2/parse.cc index 354eafe..c8dea7e 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -900,7 +900,7 @@ struct Frame { int nsub; int round; std::vector<Splice> splices; - std::vector<Splice>::iterator spliceiter; + int spliceidx; }; // Bundled into a class for friend access to Regexp without needing to declare @@ -938,15 +938,15 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) { auto& nsub = stk.back().nsub; auto& round = stk.back().round; auto& splices = stk.back().splices; - auto& spliceiter = stk.back().spliceiter; + auto& spliceidx = stk.back().spliceidx; if (splices.empty()) { // Advance to the next round of factoring. Note that this covers // the initialised state: when splices is empty and round is 0. round++; - } else if (spliceiter != splices.end()) { + } else if (spliceidx < static_cast<int>(splices.size())) { // We have at least one more Splice to factor. Recurse logically. - stk.emplace_back(spliceiter->sub, spliceiter->nsub); + stk.emplace_back(splices[spliceidx].sub, splices[spliceidx].nsub); continue; } else { // We have no more Splices to factor. Apply them. @@ -1007,8 +1007,8 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) { // (Note that references will be invalidated!) int nsuffix = nsub; stk.pop_back(); - stk.back().spliceiter->nsuffix = nsuffix; - ++stk.back().spliceiter; + stk.back().splices[stk.back().spliceidx].nsuffix = nsuffix; + ++stk.back().spliceidx; continue; } default: @@ -1016,11 +1016,11 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) { break; } - // Set spliceiter depending on whether we have Splices to factor. + // Set spliceidx depending on whether we have Splices to factor. if (splices.empty() || round == 3) { - spliceiter = splices.end(); + spliceidx = static_cast<int>(splices.size()); } else { - spliceiter = splices.begin(); + spliceidx = 0; } } } diff --git a/re2/prog.cc b/re2/prog.cc index fa03af9..9729aa4 100644 --- a/re2/prog.cc +++ b/re2/prog.cc @@ -112,7 +112,6 @@ Prog::Prog() first_byte_(-1), flags_(0), list_count_(0), - onepass_nodes_(NULL), dfa_mem_(0), dfa_first_(NULL), dfa_longest_(NULL) { @@ -121,7 +120,6 @@ Prog::Prog() Prog::~Prog() { DeleteDFA(dfa_longest_); DeleteDFA(dfa_first_); - delete[] onepass_nodes_; } typedef SparseSet Workq; @@ -397,7 +397,7 @@ class Prog { int inst_count_[kNumInst]; // count of instructions by opcode PODArray<Inst> inst_; // pointer to instruction array - uint8_t* onepass_nodes_; // data for OnePass nodes + PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes int64_t dfa_mem_; // Maximum memory for DFAs. DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch @@ -283,7 +283,7 @@ static int Fanout(Prog* prog, std::map<int, int>* histogram) { for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) { // TODO(junyer): Optimise this? int bucket = 0; - while (1 << bucket < i->second) { + while (1 << bucket < i->value()) { bucket++; } (*histogram)[bucket]++; @@ -696,7 +696,7 @@ class RE2 { }; // Returns the options set in the constructor. - const Options& options() const { return options_; }; + const Options& options() const { return options_; } // Argument converters; see below. static inline Arg CRadix(short* x); @@ -799,22 +799,22 @@ class RE2::Arg { Arg(type* p) : arg_(p), parser_(name) {} \ Arg(type* p, Parser parser) : arg_(p), parser_(parser) {} - MAKE_PARSER(char, parse_char); - MAKE_PARSER(signed char, parse_schar); - MAKE_PARSER(unsigned char, parse_uchar); - MAKE_PARSER(float, parse_float); - MAKE_PARSER(double, parse_double); - MAKE_PARSER(string, parse_string); - MAKE_PARSER(StringPiece, parse_stringpiece); - - MAKE_PARSER(short, parse_short); - MAKE_PARSER(unsigned short, parse_ushort); - MAKE_PARSER(int, parse_int); - MAKE_PARSER(unsigned int, parse_uint); - MAKE_PARSER(long, parse_long); - MAKE_PARSER(unsigned long, parse_ulong); - MAKE_PARSER(long long, parse_longlong); - MAKE_PARSER(unsigned long long, parse_ulonglong); + MAKE_PARSER(char, parse_char) + MAKE_PARSER(signed char, parse_schar) + MAKE_PARSER(unsigned char, parse_uchar) + MAKE_PARSER(float, parse_float) + MAKE_PARSER(double, parse_double) + MAKE_PARSER(string, parse_string) + MAKE_PARSER(StringPiece, parse_stringpiece) + + MAKE_PARSER(short, parse_short) + MAKE_PARSER(unsigned short, parse_ushort) + MAKE_PARSER(int, parse_int) + MAKE_PARSER(unsigned int, parse_uint) + MAKE_PARSER(long, parse_long) + MAKE_PARSER(unsigned long, parse_ulong) + MAKE_PARSER(long long, parse_longlong) + MAKE_PARSER(unsigned long long, parse_ulonglong) #undef MAKE_PARSER @@ -849,16 +849,16 @@ class RE2::Arg { public: \ static bool parse_##name##_hex(const char* str, size_t n, void* dest); \ static bool parse_##name##_octal(const char* str, size_t n, void* dest); \ - static bool parse_##name##_cradix(const char* str, size_t n, void* dest) - - DECLARE_INTEGER_PARSER(short); - DECLARE_INTEGER_PARSER(ushort); - DECLARE_INTEGER_PARSER(int); - DECLARE_INTEGER_PARSER(uint); - DECLARE_INTEGER_PARSER(long); - DECLARE_INTEGER_PARSER(ulong); - DECLARE_INTEGER_PARSER(longlong); - DECLARE_INTEGER_PARSER(ulonglong); + static bool parse_##name##_cradix(const char* str, size_t n, void* dest); + + DECLARE_INTEGER_PARSER(short) + DECLARE_INTEGER_PARSER(ushort) + DECLARE_INTEGER_PARSER(int) + DECLARE_INTEGER_PARSER(uint) + DECLARE_INTEGER_PARSER(long) + DECLARE_INTEGER_PARSER(ulong) + DECLARE_INTEGER_PARSER(longlong) + DECLARE_INTEGER_PARSER(ulonglong) #undef DECLARE_INTEGER_PARSER diff --git a/re2/stringpiece.h b/re2/stringpiece.h index 1db7a2a..1d9c2d3 100644 --- a/re2/stringpiece.h +++ b/re2/stringpiece.h @@ -19,12 +19,20 @@ // // Arghh! I wish C++ literals were "string". +// Doing this simplifies the logic below. +#ifndef __has_include +#define __has_include(x) 0 +#endif + #include <stddef.h> #include <string.h> #include <algorithm> #include <iosfwd> #include <iterator> #include <string> +#if __has_include(<string_view>) && __cplusplus >= 201703L +#include <string_view> +#endif namespace re2 { @@ -49,6 +57,10 @@ class StringPiece { // expected. StringPiece() : data_(NULL), size_(0) {} +#if __has_include(<string_view>) && __cplusplus >= 201703L + StringPiece(const std::string_view& str) + : data_(str.data()), size_(str.size()) {} +#endif StringPiece(const std::string& str) : data_(str.data()), size_(str.size()) {} StringPiece(const char* str) diff --git a/util/sparse_array.h b/util/sparse_array.h index 86d4d50..c81c9f3 100644 --- a/util/sparse_array.h +++ b/util/sparse_array.h @@ -55,7 +55,7 @@ // IMPLEMENTATION // -// SparseArray is an array dense_ and an array sparse_, both of size max_size_. +// SparseArray is an array dense_ and an array sparse_ of identical size. // At any point, the number of elements in the sparse array is size_. // // The array dense_ contains the size_ elements in the sparse array (with @@ -80,15 +80,11 @@ // To insert a new entry, set sparse_[i] to size_, // initialize dense_[size_], and then increment size_. // -// Deletion of specific values from the array is implemented by -// swapping dense_[size_-1] and the dense_ being deleted and then -// updating the appropriate sparse_ entries. -// // To make the sparse array as efficient as possible for non-primitive types, // elements may or may not be destroyed when they are deleted from the sparse -// array through a call to erase(), erase_existing() or resize(). They -// immediately become inaccessible, but they are only guaranteed to be -// destroyed when the SparseArray destructor is called. +// array through a call to resize(). They immediately become inaccessible, but +// they are only guaranteed to be destroyed when the SparseArray destructor is +// called. // // A moved-from SparseArray will be empty. @@ -99,15 +95,15 @@ #include <assert.h> #include <stdint.h> -#include <string.h> #if __has_feature(memory_sanitizer) #include <sanitizer/msan_interface.h> #endif #include <algorithm> #include <memory> -#include <type_traits> #include <utility> +#include "util/pod_array.h" + namespace re2 { template<typename Value> @@ -119,20 +115,15 @@ class SparseArray { // IndexValue pairs: exposed in SparseArray::iterator. class IndexValue; - static_assert(std::is_trivially_destructible<IndexValue>::value, - "IndexValue must be trivially destructible"); - typedef IndexValue value_type; typedef IndexValue* iterator; typedef const IndexValue* const_iterator; SparseArray(const SparseArray& src); - SparseArray(SparseArray&& src) /*noexcept*/; + SparseArray(SparseArray&& src); SparseArray& operator=(const SparseArray& src); - SparseArray& operator=(SparseArray&& src) /*noexcept*/; - - const IndexValue& iv(int i) const; + SparseArray& operator=(SparseArray&& src); // Return the number of entries in the array. int size() const { @@ -146,27 +137,30 @@ class SparseArray { // Iterate over the array. iterator begin() { - return dense_.get(); + return dense_.data(); } iterator end() { - return dense_.get() + size_; + return dense_.data() + size_; } const_iterator begin() const { - return dense_.get(); + return dense_.data(); } const_iterator end() const { - return dense_.get() + size_; + return dense_.data() + size_; } // Change the maximum size of the array. // Invalidates all iterators. - void resize(int max_size); + void resize(int new_max_size); // Return the maximum size of the array. // Indices can be in the range [0, max_size). int max_size() const { - return max_size_; + if (dense_.data() != NULL) + return dense_.size(); + else + return 0; } // Clear the array. @@ -188,122 +182,56 @@ class SparseArray { iterator set(int i, const Value& v) { return SetInternal(true, i, v); } - iterator set(int i, Value&& v) { // NOLINT - return SetInternal(true, i, std::move(v)); - } - - std::pair<iterator, bool> insert(const value_type& v) { - return InsertInternal(v); - } - std::pair<iterator, bool> insert(value_type&& v) { // NOLINT - return InsertInternal(std::move(v)); - } - - template <typename... Args> - std::pair<iterator, bool> emplace(Args&&... args) { // NOLINT - return InsertInternal(value_type(std::forward<Args>(args)...)); - } - - iterator find(int i) { - if (has_index(i)) - return dense_.get() + sparse_[i]; - return end(); - } - const_iterator find(int i) const { - if (has_index(i)) - return dense_.get() + sparse_[i]; - return end(); + // Set the value at new index i to v. + // Fast but unsafe: only use if has_index(i) is false. + iterator set_new(int i, const Value& v) { + return SetInternal(false, i, v); } - // Change the value at index i to v. + // Set the value at index i to v. // Fast but unsafe: only use if has_index(i) is true. iterator set_existing(int i, const Value& v) { return SetExistingInternal(i, v); } - iterator set_existing(int i, Value&& v) { // NOLINT - return SetExistingInternal(i, std::move(v)); - } - // Set the value at the new index i to v. - // Fast but unsafe: only use if has_index(i) is false. - iterator set_new(int i, const Value& v) { - return SetInternal(false, i, v); + // Get the value at index i. + // Fast but unsafe: only use if has_index(i) is true. + Value& get_existing(int i) { + assert(has_index(i)); + return dense_[sparse_[i]].value_; } - iterator set_new(int i, Value&& v) { // NOLINT - return SetInternal(false, i, std::move(v)); + const Value& get_existing(int i) const { + assert(has_index(i)); + return dense_[sparse_[i]].value_; } - // Get the value at index i from the array.. - // Fast but unsafe: only use if has_index(i) is true. - const Value& get_existing(int i) const; - - // Erasing items from the array during iteration is in general - // NOT safe. There is one special case, which is that the current - // index-value pair can be erased as long as the iterator is then - // checked for being at the end before being incremented. - // For example: - // - // for (i = m.begin(); i != m.end(); ++i) { - // if (ShouldErase(i->index(), i->value())) { - // m.erase(i->index()); - // --i; - // } - // } - // - // Except in the specific case just described, elements must - // not be erased from the array (including clearing the array) - // while iterators are walking over the array. Otherwise, - // the iterators could walk past the end of the array. - - // Erases the element at index i from the array. - void erase(int i); - - // Erases the element at index i from the array. - // Fast but unsafe: only use if has_index(i) is true. - void erase_existing(int i); - private: - template <typename U> - std::pair<iterator, bool> InsertInternal(U&& v) { - DebugCheckInvariants(); - std::pair<iterator, bool> p; - if (has_index(v.index_)) { - p = {dense_.get() + sparse_[v.index_], false}; - } else { - p = {set_new(std::forward<U>(v).index_, std::forward<U>(v).second), true}; - } + iterator SetInternal(bool allow_existing, int i, const Value& v) { DebugCheckInvariants(); - return p; - } - - template <typename U> - iterator SetInternal(bool allow_overwrite, int i, U&& v) { // NOLINT - DebugCheckInvariants(); - if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { + if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { assert(false && "illegal index"); // Semantically, end() would be better here, but we already know // the user did something stupid, so begin() insulates them from // dereferencing an invalid pointer. return begin(); } - if (!allow_overwrite) { + if (!allow_existing) { assert(!has_index(i)); create_index(i); } else { if (!has_index(i)) create_index(i); } - return set_existing(i, std::forward<U>(v)); // NOLINT + return SetExistingInternal(i, v); } - template <typename U> - iterator SetExistingInternal(int i, U&& v) { // NOLINT + iterator SetExistingInternal(int i, const Value& v) { DebugCheckInvariants(); assert(has_index(i)); - dense_[sparse_[i]].value() = std::forward<U>(v); + dense_[sparse_[i]].value_ = v; DebugCheckInvariants(); - return dense_.get() + sparse_[i]; + return dense_.data() + sparse_[i]; } // Add the index i to the array. @@ -321,7 +249,7 @@ class SparseArray { // Initializes memory for elements [min, max). void MaybeInitializeMemory(int min, int max) { #if __has_feature(memory_sanitizer) - __msan_unpoison(sparse_.get() + min, (max - min) * sizeof sparse_[0]); + __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]); #elif defined(RE2_ON_VALGRIND) for (int i = min; i < max; i++) { sparse_[i] = 0xababababU; @@ -330,9 +258,8 @@ class SparseArray { } int size_ = 0; - int max_size_ = 0; - std::unique_ptr<int[]> sparse_; - std::unique_ptr<IndexValue[]> dense_; + PODArray<int> sparse_; + PODArray<IndexValue> dense_; }; template<typename Value> @@ -341,109 +268,79 @@ SparseArray<Value>::SparseArray() = default; template<typename Value> SparseArray<Value>::SparseArray(const SparseArray& src) : size_(src.size_), - max_size_(src.max_size_), - sparse_(new int[max_size_]), - dense_(new IndexValue[max_size_]) { - std::copy_n(src.sparse_.get(), max_size_, sparse_.get()); - std::copy_n(src.dense_.get(), max_size_, dense_.get()); + sparse_(src.max_size()), + dense_(src.max_size()) { + std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data()); + std::copy_n(src.dense_.data(), src.max_size(), dense_.data()); } template<typename Value> -SparseArray<Value>::SparseArray(SparseArray&& src) /*noexcept*/ // NOLINT +SparseArray<Value>::SparseArray(SparseArray&& src) : size_(src.size_), - max_size_(src.max_size_), sparse_(std::move(src.sparse_)), dense_(std::move(src.dense_)) { src.size_ = 0; - src.max_size_ = 0; } template<typename Value> SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) { + // Construct these first for exception safety. + PODArray<int> a(src.max_size()); + PODArray<IndexValue> b(src.max_size()); + size_ = src.size_; - max_size_ = src.max_size_; - std::unique_ptr<int[]> a(new int[max_size_]); - std::copy_n(src.sparse_.get(), src.max_size_, a.get()); sparse_ = std::move(a); - std::unique_ptr<IndexValue[]> b(new IndexValue[max_size_]); - std::copy_n(src.dense_.get(), src.max_size_, b.get()); dense_ = std::move(b); + std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data()); + std::copy_n(src.dense_.data(), src.max_size(), dense_.data()); return *this; } template<typename Value> -SparseArray<Value>& SparseArray<Value>::operator=( - SparseArray&& src) /*noexcept*/ { // NOLINT +SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) { size_ = src.size_; - max_size_ = src.max_size_; sparse_ = std::move(src.sparse_); dense_ = std::move(src.dense_); - // clear out the source src.size_ = 0; - src.max_size_ = 0; return *this; } // IndexValue pairs: exposed in SparseArray::iterator. template<typename Value> class SparseArray<Value>::IndexValue { - friend class SparseArray; public: - typedef int first_type; - typedef Value second_type; - - IndexValue() {} - IndexValue(int i, const Value& v) : index_(i), second(v) {} - IndexValue(int i, Value&& v) : index_(i), second(std::move(v)) {} - int index() const { return index_; } - - Value& value() /*&*/ { return second; } - const Value& value() const /*&*/ { return second; } - //Value&& value() /*&&*/ { return std::move(second); } // NOLINT + Value& value() { return value_; } + const Value& value() const { return value_; } private: + friend class SparseArray; int index_; - - public: - // Provide the data in the 'second' member so that the utilities - // in map-util work. - // TODO(billydonahue): 'second' is public for short-term compatibility. - // Users will be transitioned to using value() accessor. - Value second; + Value value_; }; -template<typename Value> -const typename SparseArray<Value>::IndexValue& -SparseArray<Value>::iv(int i) const { - assert(i >= 0); - assert(i < size_); - return dense_[i]; -} - // Change the maximum size of the array. // Invalidates all iterators. template<typename Value> -void SparseArray<Value>::resize(int max_size) { +void SparseArray<Value>::resize(int new_max_size) { DebugCheckInvariants(); - if (max_size > max_size_) { - std::unique_ptr<int[]> a(new int[max_size]); - if (sparse_) { - std::copy_n(sparse_.get(), max_size_, a.get()); - } - sparse_ = std::move(a); + if (new_max_size > max_size()) { + const int old_max_size = max_size(); - std::unique_ptr<IndexValue[]> b(new IndexValue[max_size]); - if (dense_) { - std::copy_n(dense_.get(), max_size_, b.get()); - } + // Construct these first for exception safety. + PODArray<int> a(new_max_size); + PODArray<IndexValue> b(new_max_size); + + std::copy_n(sparse_.data(), old_max_size, a.data()); + std::copy_n(dense_.data(), old_max_size, b.data()); + + sparse_ = std::move(a); dense_ = std::move(b); - MaybeInitializeMemory(max_size_, max_size); + MaybeInitializeMemory(old_max_size, new_max_size); } - max_size_ = max_size; - if (size_ > max_size_) - size_ = max_size_; + if (size_ > new_max_size) + size_ = new_max_size; DebugCheckInvariants(); } @@ -451,8 +348,8 @@ void SparseArray<Value>::resize(int max_size) { template<typename Value> bool SparseArray<Value>::has_index(int i) const { assert(i >= 0); - assert(i < max_size_); - if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { + assert(i < max_size()); + if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { return false; } // Unsigned comparison avoids checking sparse_[i] < 0. @@ -461,47 +358,17 @@ bool SparseArray<Value>::has_index(int i) const { } template<typename Value> -const Value& SparseArray<Value>::get_existing(int i) const { - assert(has_index(i)); - return dense_[sparse_[i]].second; -} - -template<typename Value> -void SparseArray<Value>::erase(int i) { - DebugCheckInvariants(); - if (has_index(i)) - erase_existing(i); - DebugCheckInvariants(); -} - -template<typename Value> -void SparseArray<Value>::erase_existing(int i) { - DebugCheckInvariants(); - assert(has_index(i)); - int di = sparse_[i]; - if (di < size_ - 1) { - dense_[di] = std::move(dense_[size_ - 1]); - sparse_[dense_[di].index_] = di; - } - size_--; - DebugCheckInvariants(); -} - -template<typename Value> void SparseArray<Value>::create_index(int i) { assert(!has_index(i)); - assert(size_ < max_size_); + assert(size_ < max_size()); sparse_[i] = size_; dense_[size_].index_ = i; size_++; } -template<typename Value> SparseArray<Value>::SparseArray(int max_size) { - sparse_.reset(new int[max_size]); - dense_.reset(new IndexValue[max_size]); - size_ = 0; +template<typename Value> SparseArray<Value>::SparseArray(int max_size) : + sparse_(max_size), dense_(max_size) { MaybeInitializeMemory(size_, max_size); - max_size_ = max_size; DebugCheckInvariants(); } @@ -511,8 +378,7 @@ template<typename Value> SparseArray<Value>::~SparseArray() { template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const { assert(0 <= size_); - assert(size_ <= max_size_); - assert(size_ == 0 || sparse_ != NULL); + assert(size_ <= max_size()); } // Comparison function for sorting. diff --git a/util/sparse_set.h b/util/sparse_set.h index 2efdd28..0d5ad51 100644 --- a/util/sparse_set.h +++ b/util/sparse_set.h @@ -54,7 +54,6 @@ #include <assert.h> #include <stdint.h> -#include <string.h> #if __has_feature(memory_sanitizer) #include <sanitizer/msan_interface.h> #endif @@ -62,6 +61,8 @@ #include <memory> #include <utility> +#include "util/pod_array.h" + namespace re2 { template<typename Value> @@ -86,27 +87,30 @@ class SparseSetT { // Iterate over the set. iterator begin() { - return dense_.get(); + return dense_.data(); } iterator end() { - return dense_.get() + size_; + return dense_.data() + size_; } const_iterator begin() const { - return dense_.get(); + return dense_.data(); } const_iterator end() const { - return dense_.get() + size_; + return dense_.data() + size_; } // Change the maximum size of the set. // Invalidates all iterators. - void resize(int max_size); + void resize(int new_max_size); // Return the maximum size of the set. // Indices can be in the range [0, max_size). int max_size() const { - return max_size_; + if (dense_.data() != NULL) + return dense_.size(); + else + return 0; } // Clear the set. @@ -138,7 +142,7 @@ class SparseSetT { private: iterator InsertInternal(bool allow_existing, int i) { DebugCheckInvariants(); - if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { + if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { assert(false && "illegal index"); // Semantically, end() would be better here, but we already know // the user did something stupid, so begin() insulates them from @@ -153,7 +157,7 @@ class SparseSetT { create_index(i); } DebugCheckInvariants(); - return dense_.get() + sparse_[i]; + return dense_.data() + sparse_[i]; } // Add the index i to the set. @@ -170,7 +174,7 @@ class SparseSetT { // Initializes memory for elements [min, max). void MaybeInitializeMemory(int min, int max) { #if __has_feature(memory_sanitizer) - __msan_unpoison(sparse_.get() + min, (max - min) * sizeof sparse_[0]); + __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]); #elif defined(RE2_ON_VALGRIND) for (int i = min; i < max; i++) { sparse_[i] = 0xababababU; @@ -179,9 +183,8 @@ class SparseSetT { } int size_ = 0; - int max_size_ = 0; - std::unique_ptr<int[]> sparse_; - std::unique_ptr<int[]> dense_; + PODArray<int> sparse_; + PODArray<int> dense_; }; template<typename Value> @@ -190,26 +193,25 @@ SparseSetT<Value>::SparseSetT() = default; // Change the maximum size of the set. // Invalidates all iterators. template<typename Value> -void SparseSetT<Value>::resize(int max_size) { +void SparseSetT<Value>::resize(int new_max_size) { DebugCheckInvariants(); - if (max_size > max_size_) { - std::unique_ptr<int[]> a(new int[max_size]); - if (sparse_) { - std::copy_n(sparse_.get(), max_size_, a.get()); - } - sparse_ = std::move(a); + if (new_max_size > max_size()) { + const int old_max_size = max_size(); - std::unique_ptr<int[]> b(new int[max_size]); - if (dense_) { - std::copy_n(dense_.get(), max_size_, b.get()); - } + // Construct these first for exception safety. + PODArray<int> a(new_max_size); + PODArray<int> b(new_max_size); + + std::copy_n(sparse_.data(), old_max_size, a.data()); + std::copy_n(dense_.data(), old_max_size, b.data()); + + sparse_ = std::move(a); dense_ = std::move(b); - MaybeInitializeMemory(max_size_, max_size); + MaybeInitializeMemory(old_max_size, new_max_size); } - max_size_ = max_size; - if (size_ > max_size_) - size_ = max_size_; + if (size_ > new_max_size) + size_ = new_max_size; DebugCheckInvariants(); } @@ -217,8 +219,8 @@ void SparseSetT<Value>::resize(int max_size) { template<typename Value> bool SparseSetT<Value>::contains(int i) const { assert(i >= 0); - assert(i < max_size_); - if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { + assert(i < max_size()); + if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { return false; } // Unsigned comparison avoids checking sparse_[i] < 0. @@ -229,18 +231,15 @@ bool SparseSetT<Value>::contains(int i) const { template<typename Value> void SparseSetT<Value>::create_index(int i) { assert(!contains(i)); - assert(size_ < max_size_); + assert(size_ < max_size()); sparse_[i] = size_; dense_[size_] = i; size_++; } -template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) { - sparse_.reset(new int[max_size]); - dense_.reset(new int[max_size]); - size_ = 0; +template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) : + sparse_(max_size), dense_(max_size) { MaybeInitializeMemory(size_, max_size); - max_size_ = max_size; DebugCheckInvariants(); } @@ -250,8 +249,7 @@ template<typename Value> SparseSetT<Value>::~SparseSetT() { template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const { assert(0 <= size_); - assert(size_ <= max_size_); - assert(size_ == 0 || sparse_ != NULL); + assert(size_ <= max_size()); } // Comparison function for sorting. |