summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2019-09-25 15:35:57 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2019-09-25 15:35:57 +0900
commitde99325ffe35c346d71d37e5f60b67e29313c1f2 (patch)
tree937eb9611b8c5952744ba895f711b70b0c565b58
parent07d538b47cc1d986c87b748fe5961beb1e8d2fbd (diff)
downloadre2-de99325ffe35c346d71d37e5f60b67e29313c1f2.tar.gz
re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.tar.bz2
re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.zip
Imported Upstream version 20190301upstream/20190301
-rw-r--r--.travis.yml11
-rw-r--r--README2
-rwxr-xr-xkokoro/bazel.sh26
-rwxr-xr-xkokoro/cmake.sh25
-rwxr-xr-xkokoro/macos-bazel.sh26
-rwxr-xr-xkokoro/macos-cmake.sh14
-rwxr-xr-xkokoro/ubuntu-bazel.sh26
-rwxr-xr-xkokoro/windows-bazel.bat32
-rwxr-xr-xkokoro/windows-cmake.bat20
-rw-r--r--re2/fuzzing/re2_fuzzer.cc18
-rw-r--r--re2/nfa.cc15
-rw-r--r--re2/onepass.cc8
-rw-r--r--re2/parse.cc18
-rw-r--r--re2/prog.cc2
-rw-r--r--re2/prog.h2
-rw-r--r--re2/re2.cc2
-rw-r--r--re2/re2.h54
-rw-r--r--re2/stringpiece.h12
-rw-r--r--util/sparse_array.h286
-rw-r--r--util/sparse_set.h74
20 files changed, 258 insertions, 415 deletions
diff --git a/.travis.yml b/.travis.yml
index 674cda5..0ddc4a9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -154,6 +154,17 @@ matrix:
- clang-7
env:
- MATRIX_EVAL="CC=clang-7 CXX=clang++-7"
+ - os: linux
+ addons:
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-8 main'
+ key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
+ packages:
+ - clang-8
+ env:
+ - MATRIX_EVAL="CC=clang-8 CXX=clang++-8"
before_install:
- eval "${MATRIX_EVAL}"
diff --git a/README b/README
index 681d6d7..d1ef431 100644
--- a/README
+++ b/README
@@ -28,7 +28,7 @@ under the BSD-style license found in the LICENSE file.
RE2's native language is C++.
A C wrapper is at https://github.com/marcomaggi/cre2/.
-An Erlang wrapper is at https://github.com/tuncer/re2/ and on Hex (hex.pm).
+An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
diff --git a/kokoro/bazel.sh b/kokoro/bazel.sh
new file mode 100755
index 0000000..6f25982
--- /dev/null
+++ b/kokoro/bazel.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+set -eux
+
+cd git/re2
+
+bazel clean
+bazel build --compilation_mode=dbg -- //...
+bazel test --compilation_mode=dbg --test_output=errors -- //... \
+ -//:dfa_test \
+ -//:exhaustive1_test \
+ -//:exhaustive2_test \
+ -//:exhaustive3_test \
+ -//:exhaustive_test \
+ -//:random_test
+
+bazel clean
+bazel build --compilation_mode=opt -- //...
+bazel test --compilation_mode=opt --test_output=errors -- //... \
+ -//:dfa_test \
+ -//:exhaustive1_test \
+ -//:exhaustive2_test \
+ -//:exhaustive3_test \
+ -//:exhaustive_test \
+ -//:random_test
+
+exit 0
diff --git a/kokoro/cmake.sh b/kokoro/cmake.sh
new file mode 100755
index 0000000..999fbfe
--- /dev/null
+++ b/kokoro/cmake.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+set -eux
+
+cd git/re2
+
+case "${KOKORO_JOB_NAME}" in
+ */windows-*)
+ CMAKE_G_A_FLAGS=('-G' 'Visual Studio 14 2015' '-A' 'x64')
+ ;;
+ *)
+ CMAKE_G_A_FLAGS=()
+ # Work around a bug in older versions of bash. :/
+ set +u
+ ;;
+esac
+
+cmake -D CMAKE_BUILD_TYPE=Debug "${CMAKE_G_A_FLAGS[@]}" .
+cmake --build . --config Debug --clean-first
+ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random'
+
+cmake -D CMAKE_BUILD_TYPE=Release "${CMAKE_G_A_FLAGS[@]}" .
+cmake --build . --config Release --clean-first
+ctest -C Release --output-on-failure -E 'dfa|exhaustive|random'
+
+exit 0
diff --git a/kokoro/macos-bazel.sh b/kokoro/macos-bazel.sh
index 6f25982..e43c852 100755
--- a/kokoro/macos-bazel.sh
+++ b/kokoro/macos-bazel.sh
@@ -1,26 +1,4 @@
#!/bin/bash
set -eux
-
-cd git/re2
-
-bazel clean
-bazel build --compilation_mode=dbg -- //...
-bazel test --compilation_mode=dbg --test_output=errors -- //... \
- -//:dfa_test \
- -//:exhaustive1_test \
- -//:exhaustive2_test \
- -//:exhaustive3_test \
- -//:exhaustive_test \
- -//:random_test
-
-bazel clean
-bazel build --compilation_mode=opt -- //...
-bazel test --compilation_mode=opt --test_output=errors -- //... \
- -//:dfa_test \
- -//:exhaustive1_test \
- -//:exhaustive2_test \
- -//:exhaustive3_test \
- -//:exhaustive_test \
- -//:random_test
-
-exit 0
+bash git/re2/kokoro/bazel.sh
+exit $?
diff --git a/kokoro/macos-cmake.sh b/kokoro/macos-cmake.sh
index dbe94cb..ef4b7dc 100755
--- a/kokoro/macos-cmake.sh
+++ b/kokoro/macos-cmake.sh
@@ -1,14 +1,4 @@
#!/bin/bash
set -eux
-
-cd git/re2
-
-cmake -D CMAKE_BUILD_TYPE=Debug .
-cmake --build . --config Debug --clean-first
-ctest -C Debug --output-on-failure -E dfa\|exhaustive\|random
-
-cmake -D CMAKE_BUILD_TYPE=Release .
-cmake --build . --config Release --clean-first
-ctest -C Release --output-on-failure -E dfa\|exhaustive\|random
-
-exit 0
+bash git/re2/kokoro/cmake.sh
+exit $?
diff --git a/kokoro/ubuntu-bazel.sh b/kokoro/ubuntu-bazel.sh
index 6f25982..e43c852 100755
--- a/kokoro/ubuntu-bazel.sh
+++ b/kokoro/ubuntu-bazel.sh
@@ -1,26 +1,4 @@
#!/bin/bash
set -eux
-
-cd git/re2
-
-bazel clean
-bazel build --compilation_mode=dbg -- //...
-bazel test --compilation_mode=dbg --test_output=errors -- //... \
- -//:dfa_test \
- -//:exhaustive1_test \
- -//:exhaustive2_test \
- -//:exhaustive3_test \
- -//:exhaustive_test \
- -//:random_test
-
-bazel clean
-bazel build --compilation_mode=opt -- //...
-bazel test --compilation_mode=opt --test_output=errors -- //... \
- -//:dfa_test \
- -//:exhaustive1_test \
- -//:exhaustive2_test \
- -//:exhaustive3_test \
- -//:exhaustive_test \
- -//:random_test
-
-exit 0
+bash git/re2/kokoro/bazel.sh
+exit $?
diff --git a/kokoro/windows-bazel.bat b/kokoro/windows-bazel.bat
index 9ac3d5b..283f8d2 100755
--- a/kokoro/windows-bazel.bat
+++ b/kokoro/windows-bazel.bat
@@ -1,30 +1,2 @@
-CD git/re2 ^
- || EXIT /B 1
-
-bazel clean ^
- || EXIT /B 1
-bazel build --compilation_mode=dbg -- //... ^
- || EXIT /B 1
-bazel test --compilation_mode=dbg --test_output=errors -- //... ^
- -//:dfa_test ^
- -//:exhaustive1_test ^
- -//:exhaustive2_test ^
- -//:exhaustive3_test ^
- -//:exhaustive_test ^
- -//:random_test ^
- || EXIT /B 1
-
-bazel clean ^
- || EXIT /B 1
-bazel build --compilation_mode=opt -- //... ^
- || EXIT /B 1
-bazel test --compilation_mode=opt --test_output=errors -- //... ^
- -//:dfa_test ^
- -//:exhaustive1_test ^
- -//:exhaustive2_test ^
- -//:exhaustive3_test ^
- -//:exhaustive_test ^
- -//:random_test ^
- || EXIT /B 1
-
-EXIT /B 0
+bash git/re2/kokoro/bazel.sh
+EXIT /B %ERRORLEVEL%
diff --git a/kokoro/windows-cmake.bat b/kokoro/windows-cmake.bat
index e926d88..77a4db9 100755
--- a/kokoro/windows-cmake.bat
+++ b/kokoro/windows-cmake.bat
@@ -1,18 +1,2 @@
-CD git/re2 ^
- || EXIT /B 1
-
-cmake -D CMAKE_BUILD_TYPE=Debug -G "Visual Studio 14 2015" -A x64 . ^
- || EXIT /B 1
-cmake --build . --config Debug --clean-first ^
- || EXIT /B 1
-ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random ^
- || EXIT /B 1
-
-cmake -D CMAKE_BUILD_TYPE=Release -G "Visual Studio 14 2015" -A x64 . ^
- || EXIT /B 1
-cmake --build . --config Release --clean-first ^
- || EXIT /B 1
-ctest -C Release --output-on-failure -E dfa^|exhaustive^|random ^
- || EXIT /B 1
-
-EXIT /B 0
+bash git/re2/kokoro/cmake.sh
+EXIT /B %ERRORLEVEL%
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 084b5c0..5e5d324 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -106,26 +106,32 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size == 0 || size > 999)
return 0;
- // Crudely limit the use of ., \p and \P.
- // Otherwise, we will waste time on inputs that have long runs of Unicode
+ // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
+ // Otherwise, we will waste time on inputs that have long runs of various
// character classes. The fuzzer has shown itself to be easily capable of
// generating such patterns that fall within the other limits, but result
// in timeouts nonetheless. The marginal cost is high - even more so when
// counted repetition is involved - whereas the marginal benefit is zero.
- int dot = 0;
- int backslash_p = 0;
+ // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
+ int char_class = 0;
+ int backslash_p = 0; // very expensive, so handle specially
for (size_t i = 0; i < size; i++) {
if (data[i] == '.')
- dot++;
+ char_class++;
if (data[i] != '\\')
continue;
i++;
if (i >= size)
break;
+ if (data[i] == 'p' || data[i] == 'P' ||
+ data[i] == 'd' || data[i] == 'D' ||
+ data[i] == 's' || data[i] == 'S' ||
+ data[i] == 'w' || data[i] == 'W')
+ char_class++;
if (data[i] == 'p' || data[i] == 'P')
backslash_p++;
}
- if (dot > 99)
+ if (char_class > 9)
return 0;
if (backslash_p > 1)
return 0;
diff --git a/re2/nfa.cc b/re2/nfa.cc
index c8c49a9..04d4c6f 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -238,8 +238,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
// or we might not. Even if not, it is necessary to have it,
// so that we don't revisit id0 during the recursion.
q->set_new(id, NULL);
-
- Thread** tp = &q->find(id)->second;
+ Thread** tp = &q->get_existing(id);
int j;
Thread* t;
Prog::Inst* ip = prog_->inst(id);
@@ -329,7 +328,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
nextq->clear();
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
- Thread* t = i->second;
+ Thread* t = i->value();
if (t == NULL)
continue;
@@ -364,7 +363,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
Decref(t);
for (++i; i != runq->end(); ++i)
- Decref(i->second);
+ Decref(i->value());
runq->clear();
if (ip->greedy(prog_))
return ip->out1();
@@ -403,7 +402,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
// rest of the current Threadq.
Decref(t);
for (++i; i != runq->end(); ++i)
- Decref(i->second);
+ Decref(i->value());
runq->clear();
return 0;
}
@@ -505,7 +504,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
fprintf(stderr, "%c:", c);
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
- Thread* t = i->second;
+ Thread* t = i->value();
if (t == NULL)
continue;
fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str());
@@ -586,7 +585,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
}
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
- Decref(i->second);
+ Decref(i->value());
if (matched_) {
for (int i = 0; i < nsubmatch; i++)
@@ -697,7 +696,7 @@ void Prog::Fanout(SparseArray<int>* fanout) {
fanout->clear();
fanout->set_new(start(), 0);
for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {
- int* count = &i->second;
+ int* count = &i->value();
reachable.clear();
reachable.insert(i->index());
for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
diff --git a/re2/onepass.cc b/re2/onepass.cc
index 7d39290..edd2c48 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc
@@ -244,7 +244,7 @@ bool Prog::SearchOnePass(const StringPiece& text,
if (anchor_end())
kind = kFullMatch;
- uint8_t* nodes = onepass_nodes_;
+ uint8_t* nodes = onepass_nodes_.data();
int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t);
// start() is always mapped to the zeroth OneState.
OneState* state = IndexToNode(nodes, statesize, 0);
@@ -383,7 +383,7 @@ struct InstCond {
// Constructs and saves corresponding one-pass NFA on success.
bool Prog::IsOnePass() {
if (did_onepass_)
- return onepass_nodes_ != NULL;
+ return onepass_nodes_.data() != NULL;
did_onepass_ = true;
if (start() == 0) // no match
@@ -612,8 +612,8 @@ bool Prog::IsOnePass() {
}
dfa_mem_ -= nalloc*statesize;
- onepass_nodes_ = new uint8_t[nalloc*statesize];
- memmove(onepass_nodes_, nodes.data(), nalloc*statesize);
+ onepass_nodes_ = PODArray<uint8_t>(nalloc*statesize);
+ memmove(onepass_nodes_.data(), nodes.data(), nalloc*statesize);
return true;
fail:
diff --git a/re2/parse.cc b/re2/parse.cc
index 354eafe..c8dea7e 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -900,7 +900,7 @@ struct Frame {
int nsub;
int round;
std::vector<Splice> splices;
- std::vector<Splice>::iterator spliceiter;
+ int spliceidx;
};
// Bundled into a class for friend access to Regexp without needing to declare
@@ -938,15 +938,15 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
auto& nsub = stk.back().nsub;
auto& round = stk.back().round;
auto& splices = stk.back().splices;
- auto& spliceiter = stk.back().spliceiter;
+ auto& spliceidx = stk.back().spliceidx;
if (splices.empty()) {
// Advance to the next round of factoring. Note that this covers
// the initialised state: when splices is empty and round is 0.
round++;
- } else if (spliceiter != splices.end()) {
+ } else if (spliceidx < static_cast<int>(splices.size())) {
// We have at least one more Splice to factor. Recurse logically.
- stk.emplace_back(spliceiter->sub, spliceiter->nsub);
+ stk.emplace_back(splices[spliceidx].sub, splices[spliceidx].nsub);
continue;
} else {
// We have no more Splices to factor. Apply them.
@@ -1007,8 +1007,8 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
// (Note that references will be invalidated!)
int nsuffix = nsub;
stk.pop_back();
- stk.back().spliceiter->nsuffix = nsuffix;
- ++stk.back().spliceiter;
+ stk.back().splices[stk.back().spliceidx].nsuffix = nsuffix;
+ ++stk.back().spliceidx;
continue;
}
default:
@@ -1016,11 +1016,11 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
break;
}
- // Set spliceiter depending on whether we have Splices to factor.
+ // Set spliceidx depending on whether we have Splices to factor.
if (splices.empty() || round == 3) {
- spliceiter = splices.end();
+ spliceidx = static_cast<int>(splices.size());
} else {
- spliceiter = splices.begin();
+ spliceidx = 0;
}
}
}
diff --git a/re2/prog.cc b/re2/prog.cc
index fa03af9..9729aa4 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -112,7 +112,6 @@ Prog::Prog()
first_byte_(-1),
flags_(0),
list_count_(0),
- onepass_nodes_(NULL),
dfa_mem_(0),
dfa_first_(NULL),
dfa_longest_(NULL) {
@@ -121,7 +120,6 @@ Prog::Prog()
Prog::~Prog() {
DeleteDFA(dfa_longest_);
DeleteDFA(dfa_first_);
- delete[] onepass_nodes_;
}
typedef SparseSet Workq;
diff --git a/re2/prog.h b/re2/prog.h
index 268ab9d..edac935 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -397,7 +397,7 @@ class Prog {
int inst_count_[kNumInst]; // count of instructions by opcode
PODArray<Inst> inst_; // pointer to instruction array
- uint8_t* onepass_nodes_; // data for OnePass nodes
+ PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes
int64_t dfa_mem_; // Maximum memory for DFAs.
DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch
diff --git a/re2/re2.cc b/re2/re2.cc
index 4e42f8b..1529807 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -283,7 +283,7 @@ static int Fanout(Prog* prog, std::map<int, int>* histogram) {
for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
// TODO(junyer): Optimise this?
int bucket = 0;
- while (1 << bucket < i->second) {
+ while (1 << bucket < i->value()) {
bucket++;
}
(*histogram)[bucket]++;
diff --git a/re2/re2.h b/re2/re2.h
index f846029..216347d 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -696,7 +696,7 @@ class RE2 {
};
// Returns the options set in the constructor.
- const Options& options() const { return options_; };
+ const Options& options() const { return options_; }
// Argument converters; see below.
static inline Arg CRadix(short* x);
@@ -799,22 +799,22 @@ class RE2::Arg {
Arg(type* p) : arg_(p), parser_(name) {} \
Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
- MAKE_PARSER(char, parse_char);
- MAKE_PARSER(signed char, parse_schar);
- MAKE_PARSER(unsigned char, parse_uchar);
- MAKE_PARSER(float, parse_float);
- MAKE_PARSER(double, parse_double);
- MAKE_PARSER(string, parse_string);
- MAKE_PARSER(StringPiece, parse_stringpiece);
-
- MAKE_PARSER(short, parse_short);
- MAKE_PARSER(unsigned short, parse_ushort);
- MAKE_PARSER(int, parse_int);
- MAKE_PARSER(unsigned int, parse_uint);
- MAKE_PARSER(long, parse_long);
- MAKE_PARSER(unsigned long, parse_ulong);
- MAKE_PARSER(long long, parse_longlong);
- MAKE_PARSER(unsigned long long, parse_ulonglong);
+ MAKE_PARSER(char, parse_char)
+ MAKE_PARSER(signed char, parse_schar)
+ MAKE_PARSER(unsigned char, parse_uchar)
+ MAKE_PARSER(float, parse_float)
+ MAKE_PARSER(double, parse_double)
+ MAKE_PARSER(string, parse_string)
+ MAKE_PARSER(StringPiece, parse_stringpiece)
+
+ MAKE_PARSER(short, parse_short)
+ MAKE_PARSER(unsigned short, parse_ushort)
+ MAKE_PARSER(int, parse_int)
+ MAKE_PARSER(unsigned int, parse_uint)
+ MAKE_PARSER(long, parse_long)
+ MAKE_PARSER(unsigned long, parse_ulong)
+ MAKE_PARSER(long long, parse_longlong)
+ MAKE_PARSER(unsigned long long, parse_ulonglong)
#undef MAKE_PARSER
@@ -849,16 +849,16 @@ class RE2::Arg {
public: \
static bool parse_##name##_hex(const char* str, size_t n, void* dest); \
static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
- static bool parse_##name##_cradix(const char* str, size_t n, void* dest)
-
- DECLARE_INTEGER_PARSER(short);
- DECLARE_INTEGER_PARSER(ushort);
- DECLARE_INTEGER_PARSER(int);
- DECLARE_INTEGER_PARSER(uint);
- DECLARE_INTEGER_PARSER(long);
- DECLARE_INTEGER_PARSER(ulong);
- DECLARE_INTEGER_PARSER(longlong);
- DECLARE_INTEGER_PARSER(ulonglong);
+ static bool parse_##name##_cradix(const char* str, size_t n, void* dest);
+
+ DECLARE_INTEGER_PARSER(short)
+ DECLARE_INTEGER_PARSER(ushort)
+ DECLARE_INTEGER_PARSER(int)
+ DECLARE_INTEGER_PARSER(uint)
+ DECLARE_INTEGER_PARSER(long)
+ DECLARE_INTEGER_PARSER(ulong)
+ DECLARE_INTEGER_PARSER(longlong)
+ DECLARE_INTEGER_PARSER(ulonglong)
#undef DECLARE_INTEGER_PARSER
diff --git a/re2/stringpiece.h b/re2/stringpiece.h
index 1db7a2a..1d9c2d3 100644
--- a/re2/stringpiece.h
+++ b/re2/stringpiece.h
@@ -19,12 +19,20 @@
//
// Arghh! I wish C++ literals were "string".
+// Doing this simplifies the logic below.
+#ifndef __has_include
+#define __has_include(x) 0
+#endif
+
#include <stddef.h>
#include <string.h>
#include <algorithm>
#include <iosfwd>
#include <iterator>
#include <string>
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+#include <string_view>
+#endif
namespace re2 {
@@ -49,6 +57,10 @@ class StringPiece {
// expected.
StringPiece()
: data_(NULL), size_(0) {}
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+ StringPiece(const std::string_view& str)
+ : data_(str.data()), size_(str.size()) {}
+#endif
StringPiece(const std::string& str)
: data_(str.data()), size_(str.size()) {}
StringPiece(const char* str)
diff --git a/util/sparse_array.h b/util/sparse_array.h
index 86d4d50..c81c9f3 100644
--- a/util/sparse_array.h
+++ b/util/sparse_array.h
@@ -55,7 +55,7 @@
// IMPLEMENTATION
//
-// SparseArray is an array dense_ and an array sparse_, both of size max_size_.
+// SparseArray is an array dense_ and an array sparse_ of identical size.
// At any point, the number of elements in the sparse array is size_.
//
// The array dense_ contains the size_ elements in the sparse array (with
@@ -80,15 +80,11 @@
// To insert a new entry, set sparse_[i] to size_,
// initialize dense_[size_], and then increment size_.
//
-// Deletion of specific values from the array is implemented by
-// swapping dense_[size_-1] and the dense_ being deleted and then
-// updating the appropriate sparse_ entries.
-//
// To make the sparse array as efficient as possible for non-primitive types,
// elements may or may not be destroyed when they are deleted from the sparse
-// array through a call to erase(), erase_existing() or resize(). They
-// immediately become inaccessible, but they are only guaranteed to be
-// destroyed when the SparseArray destructor is called.
+// array through a call to resize(). They immediately become inaccessible, but
+// they are only guaranteed to be destroyed when the SparseArray destructor is
+// called.
//
// A moved-from SparseArray will be empty.
@@ -99,15 +95,15 @@
#include <assert.h>
#include <stdint.h>
-#include <string.h>
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>
#endif
#include <algorithm>
#include <memory>
-#include <type_traits>
#include <utility>
+#include "util/pod_array.h"
+
namespace re2 {
template<typename Value>
@@ -119,20 +115,15 @@ class SparseArray {
// IndexValue pairs: exposed in SparseArray::iterator.
class IndexValue;
- static_assert(std::is_trivially_destructible<IndexValue>::value,
- "IndexValue must be trivially destructible");
- typedef IndexValue value_type;
typedef IndexValue* iterator;
typedef const IndexValue* const_iterator;
SparseArray(const SparseArray& src);
- SparseArray(SparseArray&& src) /*noexcept*/;
+ SparseArray(SparseArray&& src);
SparseArray& operator=(const SparseArray& src);
- SparseArray& operator=(SparseArray&& src) /*noexcept*/;
-
- const IndexValue& iv(int i) const;
+ SparseArray& operator=(SparseArray&& src);
// Return the number of entries in the array.
int size() const {
@@ -146,27 +137,30 @@ class SparseArray {
// Iterate over the array.
iterator begin() {
- return dense_.get();
+ return dense_.data();
}
iterator end() {
- return dense_.get() + size_;
+ return dense_.data() + size_;
}
const_iterator begin() const {
- return dense_.get();
+ return dense_.data();
}
const_iterator end() const {
- return dense_.get() + size_;
+ return dense_.data() + size_;
}
// Change the maximum size of the array.
// Invalidates all iterators.
- void resize(int max_size);
+ void resize(int new_max_size);
// Return the maximum size of the array.
// Indices can be in the range [0, max_size).
int max_size() const {
- return max_size_;
+ if (dense_.data() != NULL)
+ return dense_.size();
+ else
+ return 0;
}
// Clear the array.
@@ -188,122 +182,56 @@ class SparseArray {
iterator set(int i, const Value& v) {
return SetInternal(true, i, v);
}
- iterator set(int i, Value&& v) { // NOLINT
- return SetInternal(true, i, std::move(v));
- }
-
- std::pair<iterator, bool> insert(const value_type& v) {
- return InsertInternal(v);
- }
- std::pair<iterator, bool> insert(value_type&& v) { // NOLINT
- return InsertInternal(std::move(v));
- }
-
- template <typename... Args>
- std::pair<iterator, bool> emplace(Args&&... args) { // NOLINT
- return InsertInternal(value_type(std::forward<Args>(args)...));
- }
-
- iterator find(int i) {
- if (has_index(i))
- return dense_.get() + sparse_[i];
- return end();
- }
- const_iterator find(int i) const {
- if (has_index(i))
- return dense_.get() + sparse_[i];
- return end();
+ // Set the value at new index i to v.
+ // Fast but unsafe: only use if has_index(i) is false.
+ iterator set_new(int i, const Value& v) {
+ return SetInternal(false, i, v);
}
- // Change the value at index i to v.
+ // Set the value at index i to v.
// Fast but unsafe: only use if has_index(i) is true.
iterator set_existing(int i, const Value& v) {
return SetExistingInternal(i, v);
}
- iterator set_existing(int i, Value&& v) { // NOLINT
- return SetExistingInternal(i, std::move(v));
- }
- // Set the value at the new index i to v.
- // Fast but unsafe: only use if has_index(i) is false.
- iterator set_new(int i, const Value& v) {
- return SetInternal(false, i, v);
+ // Get the value at index i.
+ // Fast but unsafe: only use if has_index(i) is true.
+ Value& get_existing(int i) {
+ assert(has_index(i));
+ return dense_[sparse_[i]].value_;
}
- iterator set_new(int i, Value&& v) { // NOLINT
- return SetInternal(false, i, std::move(v));
+ const Value& get_existing(int i) const {
+ assert(has_index(i));
+ return dense_[sparse_[i]].value_;
}
- // Get the value at index i from the array..
- // Fast but unsafe: only use if has_index(i) is true.
- const Value& get_existing(int i) const;
-
- // Erasing items from the array during iteration is in general
- // NOT safe. There is one special case, which is that the current
- // index-value pair can be erased as long as the iterator is then
- // checked for being at the end before being incremented.
- // For example:
- //
- // for (i = m.begin(); i != m.end(); ++i) {
- // if (ShouldErase(i->index(), i->value())) {
- // m.erase(i->index());
- // --i;
- // }
- // }
- //
- // Except in the specific case just described, elements must
- // not be erased from the array (including clearing the array)
- // while iterators are walking over the array. Otherwise,
- // the iterators could walk past the end of the array.
-
- // Erases the element at index i from the array.
- void erase(int i);
-
- // Erases the element at index i from the array.
- // Fast but unsafe: only use if has_index(i) is true.
- void erase_existing(int i);
-
private:
- template <typename U>
- std::pair<iterator, bool> InsertInternal(U&& v) {
- DebugCheckInvariants();
- std::pair<iterator, bool> p;
- if (has_index(v.index_)) {
- p = {dense_.get() + sparse_[v.index_], false};
- } else {
- p = {set_new(std::forward<U>(v).index_, std::forward<U>(v).second), true};
- }
+ iterator SetInternal(bool allow_existing, int i, const Value& v) {
DebugCheckInvariants();
- return p;
- }
-
- template <typename U>
- iterator SetInternal(bool allow_overwrite, int i, U&& v) { // NOLINT
- DebugCheckInvariants();
- if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
+ if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
assert(false && "illegal index");
// Semantically, end() would be better here, but we already know
// the user did something stupid, so begin() insulates them from
// dereferencing an invalid pointer.
return begin();
}
- if (!allow_overwrite) {
+ if (!allow_existing) {
assert(!has_index(i));
create_index(i);
} else {
if (!has_index(i))
create_index(i);
}
- return set_existing(i, std::forward<U>(v)); // NOLINT
+ return SetExistingInternal(i, v);
}
- template <typename U>
- iterator SetExistingInternal(int i, U&& v) { // NOLINT
+ iterator SetExistingInternal(int i, const Value& v) {
DebugCheckInvariants();
assert(has_index(i));
- dense_[sparse_[i]].value() = std::forward<U>(v);
+ dense_[sparse_[i]].value_ = v;
DebugCheckInvariants();
- return dense_.get() + sparse_[i];
+ return dense_.data() + sparse_[i];
}
// Add the index i to the array.
@@ -321,7 +249,7 @@ class SparseArray {
// Initializes memory for elements [min, max).
void MaybeInitializeMemory(int min, int max) {
#if __has_feature(memory_sanitizer)
- __msan_unpoison(sparse_.get() + min, (max - min) * sizeof sparse_[0]);
+ __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
#elif defined(RE2_ON_VALGRIND)
for (int i = min; i < max; i++) {
sparse_[i] = 0xababababU;
@@ -330,9 +258,8 @@ class SparseArray {
}
int size_ = 0;
- int max_size_ = 0;
- std::unique_ptr<int[]> sparse_;
- std::unique_ptr<IndexValue[]> dense_;
+ PODArray<int> sparse_;
+ PODArray<IndexValue> dense_;
};
template<typename Value>
@@ -341,109 +268,79 @@ SparseArray<Value>::SparseArray() = default;
template<typename Value>
SparseArray<Value>::SparseArray(const SparseArray& src)
: size_(src.size_),
- max_size_(src.max_size_),
- sparse_(new int[max_size_]),
- dense_(new IndexValue[max_size_]) {
- std::copy_n(src.sparse_.get(), max_size_, sparse_.get());
- std::copy_n(src.dense_.get(), max_size_, dense_.get());
+ sparse_(src.max_size()),
+ dense_(src.max_size()) {
+ std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
+ std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
}
template<typename Value>
-SparseArray<Value>::SparseArray(SparseArray&& src) /*noexcept*/ // NOLINT
+SparseArray<Value>::SparseArray(SparseArray&& src)
: size_(src.size_),
- max_size_(src.max_size_),
sparse_(std::move(src.sparse_)),
dense_(std::move(src.dense_)) {
src.size_ = 0;
- src.max_size_ = 0;
}
template<typename Value>
SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) {
+ // Construct these first for exception safety.
+ PODArray<int> a(src.max_size());
+ PODArray<IndexValue> b(src.max_size());
+
size_ = src.size_;
- max_size_ = src.max_size_;
- std::unique_ptr<int[]> a(new int[max_size_]);
- std::copy_n(src.sparse_.get(), src.max_size_, a.get());
sparse_ = std::move(a);
- std::unique_ptr<IndexValue[]> b(new IndexValue[max_size_]);
- std::copy_n(src.dense_.get(), src.max_size_, b.get());
dense_ = std::move(b);
+ std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
+ std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
return *this;
}
template<typename Value>
-SparseArray<Value>& SparseArray<Value>::operator=(
- SparseArray&& src) /*noexcept*/ { // NOLINT
+SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) {
size_ = src.size_;
- max_size_ = src.max_size_;
sparse_ = std::move(src.sparse_);
dense_ = std::move(src.dense_);
- // clear out the source
src.size_ = 0;
- src.max_size_ = 0;
return *this;
}
// IndexValue pairs: exposed in SparseArray::iterator.
template<typename Value>
class SparseArray<Value>::IndexValue {
- friend class SparseArray;
public:
- typedef int first_type;
- typedef Value second_type;
-
- IndexValue() {}
- IndexValue(int i, const Value& v) : index_(i), second(v) {}
- IndexValue(int i, Value&& v) : index_(i), second(std::move(v)) {}
-
int index() const { return index_; }
-
- Value& value() /*&*/ { return second; }
- const Value& value() const /*&*/ { return second; }
- //Value&& value() /*&&*/ { return std::move(second); } // NOLINT
+ Value& value() { return value_; }
+ const Value& value() const { return value_; }
private:
+ friend class SparseArray;
int index_;
-
- public:
- // Provide the data in the 'second' member so that the utilities
- // in map-util work.
- // TODO(billydonahue): 'second' is public for short-term compatibility.
- // Users will be transitioned to using value() accessor.
- Value second;
+ Value value_;
};
-template<typename Value>
-const typename SparseArray<Value>::IndexValue&
-SparseArray<Value>::iv(int i) const {
- assert(i >= 0);
- assert(i < size_);
- return dense_[i];
-}
-
// Change the maximum size of the array.
// Invalidates all iterators.
template<typename Value>
-void SparseArray<Value>::resize(int max_size) {
+void SparseArray<Value>::resize(int new_max_size) {
DebugCheckInvariants();
- if (max_size > max_size_) {
- std::unique_ptr<int[]> a(new int[max_size]);
- if (sparse_) {
- std::copy_n(sparse_.get(), max_size_, a.get());
- }
- sparse_ = std::move(a);
+ if (new_max_size > max_size()) {
+ const int old_max_size = max_size();
- std::unique_ptr<IndexValue[]> b(new IndexValue[max_size]);
- if (dense_) {
- std::copy_n(dense_.get(), max_size_, b.get());
- }
+ // Construct these first for exception safety.
+ PODArray<int> a(new_max_size);
+ PODArray<IndexValue> b(new_max_size);
+
+ std::copy_n(sparse_.data(), old_max_size, a.data());
+ std::copy_n(dense_.data(), old_max_size, b.data());
+
+ sparse_ = std::move(a);
dense_ = std::move(b);
- MaybeInitializeMemory(max_size_, max_size);
+ MaybeInitializeMemory(old_max_size, new_max_size);
}
- max_size_ = max_size;
- if (size_ > max_size_)
- size_ = max_size_;
+ if (size_ > new_max_size)
+ size_ = new_max_size;
DebugCheckInvariants();
}
@@ -451,8 +348,8 @@ void SparseArray<Value>::resize(int max_size) {
template<typename Value>
bool SparseArray<Value>::has_index(int i) const {
assert(i >= 0);
- assert(i < max_size_);
- if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
+ assert(i < max_size());
+ if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
return false;
}
// Unsigned comparison avoids checking sparse_[i] < 0.
@@ -461,47 +358,17 @@ bool SparseArray<Value>::has_index(int i) const {
}
template<typename Value>
-const Value& SparseArray<Value>::get_existing(int i) const {
- assert(has_index(i));
- return dense_[sparse_[i]].second;
-}
-
-template<typename Value>
-void SparseArray<Value>::erase(int i) {
- DebugCheckInvariants();
- if (has_index(i))
- erase_existing(i);
- DebugCheckInvariants();
-}
-
-template<typename Value>
-void SparseArray<Value>::erase_existing(int i) {
- DebugCheckInvariants();
- assert(has_index(i));
- int di = sparse_[i];
- if (di < size_ - 1) {
- dense_[di] = std::move(dense_[size_ - 1]);
- sparse_[dense_[di].index_] = di;
- }
- size_--;
- DebugCheckInvariants();
-}
-
-template<typename Value>
void SparseArray<Value>::create_index(int i) {
assert(!has_index(i));
- assert(size_ < max_size_);
+ assert(size_ < max_size());
sparse_[i] = size_;
dense_[size_].index_ = i;
size_++;
}
-template<typename Value> SparseArray<Value>::SparseArray(int max_size) {
- sparse_.reset(new int[max_size]);
- dense_.reset(new IndexValue[max_size]);
- size_ = 0;
+template<typename Value> SparseArray<Value>::SparseArray(int max_size) :
+ sparse_(max_size), dense_(max_size) {
MaybeInitializeMemory(size_, max_size);
- max_size_ = max_size;
DebugCheckInvariants();
}
@@ -511,8 +378,7 @@ template<typename Value> SparseArray<Value>::~SparseArray() {
template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
assert(0 <= size_);
- assert(size_ <= max_size_);
- assert(size_ == 0 || sparse_ != NULL);
+ assert(size_ <= max_size());
}
// Comparison function for sorting.
diff --git a/util/sparse_set.h b/util/sparse_set.h
index 2efdd28..0d5ad51 100644
--- a/util/sparse_set.h
+++ b/util/sparse_set.h
@@ -54,7 +54,6 @@
#include <assert.h>
#include <stdint.h>
-#include <string.h>
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>
#endif
@@ -62,6 +61,8 @@
#include <memory>
#include <utility>
+#include "util/pod_array.h"
+
namespace re2 {
template<typename Value>
@@ -86,27 +87,30 @@ class SparseSetT {
// Iterate over the set.
iterator begin() {
- return dense_.get();
+ return dense_.data();
}
iterator end() {
- return dense_.get() + size_;
+ return dense_.data() + size_;
}
const_iterator begin() const {
- return dense_.get();
+ return dense_.data();
}
const_iterator end() const {
- return dense_.get() + size_;
+ return dense_.data() + size_;
}
// Change the maximum size of the set.
// Invalidates all iterators.
- void resize(int max_size);
+ void resize(int new_max_size);
// Return the maximum size of the set.
// Indices can be in the range [0, max_size).
int max_size() const {
- return max_size_;
+ if (dense_.data() != NULL)
+ return dense_.size();
+ else
+ return 0;
}
// Clear the set.
@@ -138,7 +142,7 @@ class SparseSetT {
private:
iterator InsertInternal(bool allow_existing, int i) {
DebugCheckInvariants();
- if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
+ if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
assert(false && "illegal index");
// Semantically, end() would be better here, but we already know
// the user did something stupid, so begin() insulates them from
@@ -153,7 +157,7 @@ class SparseSetT {
create_index(i);
}
DebugCheckInvariants();
- return dense_.get() + sparse_[i];
+ return dense_.data() + sparse_[i];
}
// Add the index i to the set.
@@ -170,7 +174,7 @@ class SparseSetT {
// Initializes memory for elements [min, max).
void MaybeInitializeMemory(int min, int max) {
#if __has_feature(memory_sanitizer)
- __msan_unpoison(sparse_.get() + min, (max - min) * sizeof sparse_[0]);
+ __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
#elif defined(RE2_ON_VALGRIND)
for (int i = min; i < max; i++) {
sparse_[i] = 0xababababU;
@@ -179,9 +183,8 @@ class SparseSetT {
}
int size_ = 0;
- int max_size_ = 0;
- std::unique_ptr<int[]> sparse_;
- std::unique_ptr<int[]> dense_;
+ PODArray<int> sparse_;
+ PODArray<int> dense_;
};
template<typename Value>
@@ -190,26 +193,25 @@ SparseSetT<Value>::SparseSetT() = default;
// Change the maximum size of the set.
// Invalidates all iterators.
template<typename Value>
-void SparseSetT<Value>::resize(int max_size) {
+void SparseSetT<Value>::resize(int new_max_size) {
DebugCheckInvariants();
- if (max_size > max_size_) {
- std::unique_ptr<int[]> a(new int[max_size]);
- if (sparse_) {
- std::copy_n(sparse_.get(), max_size_, a.get());
- }
- sparse_ = std::move(a);
+ if (new_max_size > max_size()) {
+ const int old_max_size = max_size();
- std::unique_ptr<int[]> b(new int[max_size]);
- if (dense_) {
- std::copy_n(dense_.get(), max_size_, b.get());
- }
+ // Construct these first for exception safety.
+ PODArray<int> a(new_max_size);
+ PODArray<int> b(new_max_size);
+
+ std::copy_n(sparse_.data(), old_max_size, a.data());
+ std::copy_n(dense_.data(), old_max_size, b.data());
+
+ sparse_ = std::move(a);
dense_ = std::move(b);
- MaybeInitializeMemory(max_size_, max_size);
+ MaybeInitializeMemory(old_max_size, new_max_size);
}
- max_size_ = max_size;
- if (size_ > max_size_)
- size_ = max_size_;
+ if (size_ > new_max_size)
+ size_ = new_max_size;
DebugCheckInvariants();
}
@@ -217,8 +219,8 @@ void SparseSetT<Value>::resize(int max_size) {
template<typename Value>
bool SparseSetT<Value>::contains(int i) const {
assert(i >= 0);
- assert(i < max_size_);
- if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
+ assert(i < max_size());
+ if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
return false;
}
// Unsigned comparison avoids checking sparse_[i] < 0.
@@ -229,18 +231,15 @@ bool SparseSetT<Value>::contains(int i) const {
template<typename Value>
void SparseSetT<Value>::create_index(int i) {
assert(!contains(i));
- assert(size_ < max_size_);
+ assert(size_ < max_size());
sparse_[i] = size_;
dense_[size_] = i;
size_++;
}
-template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) {
- sparse_.reset(new int[max_size]);
- dense_.reset(new int[max_size]);
- size_ = 0;
+template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
+ sparse_(max_size), dense_(max_size) {
MaybeInitializeMemory(size_, max_size);
- max_size_ = max_size;
DebugCheckInvariants();
}
@@ -250,8 +249,7 @@ template<typename Value> SparseSetT<Value>::~SparseSetT() {
template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
assert(0 <= size_);
- assert(size_ <= max_size_);
- assert(size_ == 0 || sparse_ != NULL);
+ assert(size_ <= max_size());
}
// Comparison function for sorting.