Imported Upstream version 20190301upstream/20190301

author: DongHun Kwak <dh0128.kwak@samsung.com> 2019-09-25 15:35:57 +0900
committer: DongHun Kwak <dh0128.kwak@samsung.com> 2019-09-25 15:35:57 +0900
commit: de99325ffe35c346d71d37e5f60b67e29313c1f2 (patch)
tree: 937eb9611b8c5952744ba895f711b70b0c565b58
parent: 07d538b47cc1d986c87b748fe5961beb1e8d2fbd (diff)
download: re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.tar.gz
re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.tar.bz2
re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.zip
20 files changed, 258 insertions, 415 deletions
diff --git a/.travis.yml b/.travis.yml
index 674cda5..0ddc4a9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -154,6 +154,17 @@ matrix:
             - clang-7
       env:
         - MATRIX_EVAL="CC=clang-7 CXX=clang++-7"
+    - os: linux
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+            - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-8 main'
+              key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
+          packages:
+            - clang-8
+      env:
+        - MATRIX_EVAL="CC=clang-8 CXX=clang++-8"
 
 before_install:
   - eval "${MATRIX_EVAL}"
diff --git a/README b/README
index 681d6d7..d1ef431 100644
--- a/README
+++ b/README
@@ -28,7 +28,7 @@ under the BSD-style license found in the LICENSE file.
 RE2's native language is C++.
 
 A C wrapper is at https://github.com/marcomaggi/cre2/.
-An Erlang wrapper is at https://github.com/tuncer/re2/ and on Hex (hex.pm).
+An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
 An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
 A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
 An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
diff --git a/kokoro/bazel.sh b/kokoro/bazel.sh
new file mode 100755
index 0000000..6f25982
--- /dev/null
+++ b/kokoro/bazel.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+set -eux
+
+cd git/re2
+
+bazel clean
+bazel build --compilation_mode=dbg -- //...
+bazel test  --compilation_mode=dbg --test_output=errors -- //... \
+  -//:dfa_test \
+  -//:exhaustive1_test \
+  -//:exhaustive2_test \
+  -//:exhaustive3_test \
+  -//:exhaustive_test \
+  -//:random_test
+
+bazel clean
+bazel build --compilation_mode=opt -- //...
+bazel test  --compilation_mode=opt --test_output=errors -- //... \
+  -//:dfa_test \
+  -//:exhaustive1_test \
+  -//:exhaustive2_test \
+  -//:exhaustive3_test \
+  -//:exhaustive_test \
+  -//:random_test
+
+exit 0
diff --git a/kokoro/cmake.sh b/kokoro/cmake.sh
new file mode 100755
index 0000000..999fbfe
--- /dev/null
+++ b/kokoro/cmake.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+set -eux
+
+cd git/re2
+
+case "${KOKORO_JOB_NAME}" in
+  */windows-*)
+    CMAKE_G_A_FLAGS=('-G' 'Visual Studio 14 2015' '-A' 'x64')
+    ;;
+  *)
+    CMAKE_G_A_FLAGS=()
+    # Work around a bug in older versions of bash. :/
+    set +u
+    ;;
+esac
+
+cmake -D CMAKE_BUILD_TYPE=Debug "${CMAKE_G_A_FLAGS[@]}" .
+cmake --build . --config Debug --clean-first
+ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random'
+
+cmake -D CMAKE_BUILD_TYPE=Release "${CMAKE_G_A_FLAGS[@]}" .
+cmake --build . --config Release --clean-first
+ctest -C Release --output-on-failure -E 'dfa|exhaustive|random'
+
+exit 0
diff --git a/kokoro/macos-bazel.sh b/kokoro/macos-bazel.sh
index 6f25982..e43c852 100755
--- a/kokoro/macos-bazel.sh
+++ b/kokoro/macos-bazel.sh
@@ -1,26 +1,4 @@
 #!/bin/bash
 set -eux
-
-cd git/re2
-
-bazel clean
-bazel build --compilation_mode=dbg -- //...
-bazel test  --compilation_mode=dbg --test_output=errors -- //... \
-  -//:dfa_test \
-  -//:exhaustive1_test \
-  -//:exhaustive2_test \
-  -//:exhaustive3_test \
-  -//:exhaustive_test \
-  -//:random_test
-
-bazel clean
-bazel build --compilation_mode=opt -- //...
-bazel test  --compilation_mode=opt --test_output=errors -- //... \
-  -//:dfa_test \
-  -//:exhaustive1_test \
-  -//:exhaustive2_test \
-  -//:exhaustive3_test \
-  -//:exhaustive_test \
-  -//:random_test
-
-exit 0
+bash git/re2/kokoro/bazel.sh
+exit $?
diff --git a/kokoro/macos-cmake.sh b/kokoro/macos-cmake.sh
index dbe94cb..ef4b7dc 100755
--- a/kokoro/macos-cmake.sh
+++ b/kokoro/macos-cmake.sh
@@ -1,14 +1,4 @@
 #!/bin/bash
 set -eux
-
-cd git/re2
-
-cmake -D CMAKE_BUILD_TYPE=Debug .
-cmake --build . --config Debug --clean-first
-ctest -C Debug --output-on-failure -E dfa\|exhaustive\|random
-
-cmake -D CMAKE_BUILD_TYPE=Release .
-cmake --build . --config Release --clean-first
-ctest -C Release --output-on-failure -E dfa\|exhaustive\|random
-
-exit 0
+bash git/re2/kokoro/cmake.sh
+exit $?
diff --git a/kokoro/ubuntu-bazel.sh b/kokoro/ubuntu-bazel.sh
index 6f25982..e43c852 100755
--- a/kokoro/ubuntu-bazel.sh
+++ b/kokoro/ubuntu-bazel.sh
@@ -1,26 +1,4 @@
 #!/bin/bash
 set -eux
-
-cd git/re2
-
-bazel clean
-bazel build --compilation_mode=dbg -- //...
-bazel test  --compilation_mode=dbg --test_output=errors -- //... \
-  -//:dfa_test \
-  -//:exhaustive1_test \
-  -//:exhaustive2_test \
-  -//:exhaustive3_test \
-  -//:exhaustive_test \
-  -//:random_test
-
-bazel clean
-bazel build --compilation_mode=opt -- //...
-bazel test  --compilation_mode=opt --test_output=errors -- //... \
-  -//:dfa_test \
-  -//:exhaustive1_test \
-  -//:exhaustive2_test \
-  -//:exhaustive3_test \
-  -//:exhaustive_test \
-  -//:random_test
-
-exit 0
+bash git/re2/kokoro/bazel.sh
+exit $?
diff --git a/kokoro/windows-bazel.bat b/kokoro/windows-bazel.bat
index 9ac3d5b..283f8d2 100755
--- a/kokoro/windows-bazel.bat
+++ b/kokoro/windows-bazel.bat
@@ -1,30 +1,2 @@
-CD git/re2 ^
-  || EXIT /B 1
-
-bazel clean ^
-  || EXIT /B 1
-bazel build --compilation_mode=dbg -- //... ^
-  || EXIT /B 1
-bazel test  --compilation_mode=dbg --test_output=errors -- //... ^
-  -//:dfa_test ^
-  -//:exhaustive1_test ^
-  -//:exhaustive2_test ^
-  -//:exhaustive3_test ^
-  -//:exhaustive_test ^
-  -//:random_test ^
-  || EXIT /B 1
-
-bazel clean ^
-  || EXIT /B 1
-bazel build --compilation_mode=opt -- //... ^
-  || EXIT /B 1
-bazel test  --compilation_mode=opt --test_output=errors -- //... ^
-  -//:dfa_test ^
-  -//:exhaustive1_test ^
-  -//:exhaustive2_test ^
-  -//:exhaustive3_test ^
-  -//:exhaustive_test ^
-  -//:random_test ^
-  || EXIT /B 1
-
-EXIT /B 0
+bash git/re2/kokoro/bazel.sh
+EXIT /B %ERRORLEVEL%
diff --git a/kokoro/windows-cmake.bat b/kokoro/windows-cmake.bat
index e926d88..77a4db9 100755
--- a/kokoro/windows-cmake.bat
+++ b/kokoro/windows-cmake.bat
@@ -1,18 +1,2 @@
-CD git/re2 ^
-  || EXIT /B 1
-
-cmake -D CMAKE_BUILD_TYPE=Debug -G "Visual Studio 14 2015" -A x64 . ^
-  || EXIT /B 1
-cmake --build . --config Debug --clean-first ^
-  || EXIT /B 1
-ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random ^
-  || EXIT /B 1
-
-cmake -D CMAKE_BUILD_TYPE=Release -G "Visual Studio 14 2015" -A x64 . ^
-  || EXIT /B 1
-cmake --build . --config Release --clean-first ^
-  || EXIT /B 1
-ctest -C Release --output-on-failure -E dfa^|exhaustive^|random ^
-  || EXIT /B 1
-
-EXIT /B 0
+bash git/re2/kokoro/cmake.sh
+EXIT /B %ERRORLEVEL%
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 084b5c0..5e5d324 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -106,26 +106,32 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
   if (size == 0 || size > 999)
     return 0;
 
-  // Crudely limit the use of ., \p and \P.
-  // Otherwise, we will waste time on inputs that have long runs of Unicode
+  // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
+  // Otherwise, we will waste time on inputs that have long runs of various
   // character classes. The fuzzer has shown itself to be easily capable of
   // generating such patterns that fall within the other limits, but result
   // in timeouts nonetheless. The marginal cost is high - even more so when
   // counted repetition is involved - whereas the marginal benefit is zero.
-  int dot = 0;
-  int backslash_p = 0;
+  // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
+  int char_class = 0;
+  int backslash_p = 0;  // very expensive, so handle specially
   for (size_t i = 0; i < size; i++) {
     if (data[i] == '.')
-      dot++;
+      char_class++;
     if (data[i] != '\\')
       continue;
     i++;
     if (i >= size)
       break;
+    if (data[i] == 'p' || data[i] == 'P' ||
+        data[i] == 'd' || data[i] == 'D' ||
+        data[i] == 's' || data[i] == 'S' ||
+        data[i] == 'w' || data[i] == 'W')
+      char_class++;
     if (data[i] == 'p' || data[i] == 'P')
       backslash_p++;
   }
-  if (dot > 99)
+  if (char_class > 9)
     return 0;
   if (backslash_p > 1)
     return 0;
diff --git a/re2/nfa.cc b/re2/nfa.cc
index c8c49a9..04d4c6f 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -238,8 +238,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
     // or we might not.  Even if not, it is necessary to have it,
     // so that we don't revisit id0 during the recursion.
     q->set_new(id, NULL);
-
-    Thread** tp = &q->find(id)->second;
+    Thread** tp = &q->get_existing(id);
     int j;
     Thread* t;
     Prog::Inst* ip = prog_->inst(id);
@@ -329,7 +328,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
   nextq->clear();
 
   for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
-    Thread* t = i->second;
+    Thread* t = i->value();
     if (t == NULL)
       continue;
 
@@ -364,7 +363,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
 
           Decref(t);
           for (++i; i != runq->end(); ++i)
-            Decref(i->second);
+            Decref(i->value());
           runq->clear();
           if (ip->greedy(prog_))
             return ip->out1();
@@ -403,7 +402,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
           // rest of the current Threadq.
           Decref(t);
           for (++i; i != runq->end(); ++i)
-            Decref(i->second);
+            Decref(i->value());
           runq->clear();
           return 0;
         }
@@ -505,7 +504,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
 
       fprintf(stderr, "%c:", c);
       for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
-        Thread* t = i->second;
+        Thread* t = i->value();
         if (t == NULL)
           continue;
         fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str());
@@ -586,7 +585,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
   }
 
   for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
-    Decref(i->second);
+    Decref(i->value());
 
   if (matched_) {
     for (int i = 0; i < nsubmatch; i++)
@@ -697,7 +696,7 @@ void Prog::Fanout(SparseArray<int>* fanout) {
   fanout->clear();
   fanout->set_new(start(), 0);
   for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {
-    int* count = &i->second;
+    int* count = &i->value();
     reachable.clear();
     reachable.insert(i->index());
     for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
diff --git a/re2/onepass.cc b/re2/onepass.cc
index 7d39290..edd2c48 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc
@@ -244,7 +244,7 @@ bool Prog::SearchOnePass(const StringPiece& text,
   if (anchor_end())
     kind = kFullMatch;
 
-  uint8_t* nodes = onepass_nodes_;
+  uint8_t* nodes = onepass_nodes_.data();
   int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t);
   // start() is always mapped to the zeroth OneState.
   OneState* state = IndexToNode(nodes, statesize, 0);
@@ -383,7 +383,7 @@ struct InstCond {
 // Constructs and saves corresponding one-pass NFA on success.
 bool Prog::IsOnePass() {
   if (did_onepass_)
-    return onepass_nodes_ != NULL;
+    return onepass_nodes_.data() != NULL;
   did_onepass_ = true;
 
   if (start() == 0)  // no match
@@ -612,8 +612,8 @@ bool Prog::IsOnePass() {
   }
 
   dfa_mem_ -= nalloc*statesize;
-  onepass_nodes_ = new uint8_t[nalloc*statesize];
-  memmove(onepass_nodes_, nodes.data(), nalloc*statesize);
+  onepass_nodes_ = PODArray<uint8_t>(nalloc*statesize);
+  memmove(onepass_nodes_.data(), nodes.data(), nalloc*statesize);
   return true;
 
 fail:
diff --git a/re2/parse.cc b/re2/parse.cc
index 354eafe..c8dea7e 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -900,7 +900,7 @@ struct Frame {
   int nsub;
   int round;
   std::vector<Splice> splices;
-  std::vector<Splice>::iterator spliceiter;
+  int spliceidx;
 };
 
 // Bundled into a class for friend access to Regexp without needing to declare
@@ -938,15 +938,15 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
     auto& nsub = stk.back().nsub;
     auto& round = stk.back().round;
     auto& splices = stk.back().splices;
-    auto& spliceiter = stk.back().spliceiter;
+    auto& spliceidx = stk.back().spliceidx;
 
     if (splices.empty()) {
       // Advance to the next round of factoring. Note that this covers
       // the initialised state: when splices is empty and round is 0.
       round++;
-    } else if (spliceiter != splices.end()) {
+    } else if (spliceidx < static_cast<int>(splices.size())) {
       // We have at least one more Splice to factor. Recurse logically.
-      stk.emplace_back(spliceiter->sub, spliceiter->nsub);
+      stk.emplace_back(splices[spliceidx].sub, splices[spliceidx].nsub);
       continue;
     } else {
       // We have no more Splices to factor. Apply them.
@@ -1007,8 +1007,8 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
           // (Note that references will be invalidated!)
           int nsuffix = nsub;
           stk.pop_back();
-          stk.back().spliceiter->nsuffix = nsuffix;
-          ++stk.back().spliceiter;
+          stk.back().splices[stk.back().spliceidx].nsuffix = nsuffix;
+          ++stk.back().spliceidx;
           continue;
         }
       default:
@@ -1016,11 +1016,11 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
         break;
     }
 
-    // Set spliceiter depending on whether we have Splices to factor.
+    // Set spliceidx depending on whether we have Splices to factor.
     if (splices.empty() || round == 3) {
-      spliceiter = splices.end();
+      spliceidx = static_cast<int>(splices.size());
     } else {
-      spliceiter = splices.begin();
+      spliceidx = 0;
     }
   }
 }
diff --git a/re2/prog.cc b/re2/prog.cc
index fa03af9..9729aa4 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -112,7 +112,6 @@ Prog::Prog()
     first_byte_(-1),
     flags_(0),
     list_count_(0),
-    onepass_nodes_(NULL),
     dfa_mem_(0),
     dfa_first_(NULL),
     dfa_longest_(NULL) {
@@ -121,7 +120,6 @@ Prog::Prog()
 Prog::~Prog() {
   DeleteDFA(dfa_longest_);
   DeleteDFA(dfa_first_);
-  delete[] onepass_nodes_;
 }
 
 typedef SparseSet Workq;
diff --git a/re2/prog.h b/re2/prog.h
index 268ab9d..edac935 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -397,7 +397,7 @@ class Prog {
   int inst_count_[kNumInst];  // count of instructions by opcode
 
   PODArray<Inst> inst_;     // pointer to instruction array
-  uint8_t* onepass_nodes_;  // data for OnePass nodes
+  PODArray<uint8_t> onepass_nodes_;  // data for OnePass nodes
 
   int64_t dfa_mem_;         // Maximum memory for DFAs.
   DFA* dfa_first_;          // DFA cached for kFirstMatch/kManyMatch
diff --git a/re2/re2.cc b/re2/re2.cc
index 4e42f8b..1529807 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -283,7 +283,7 @@ static int Fanout(Prog* prog, std::map<int, int>* histogram) {
   for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
     // TODO(junyer): Optimise this?
     int bucket = 0;
-    while (1 << bucket < i->second) {
+    while (1 << bucket < i->value()) {
       bucket++;
     }
     (*histogram)[bucket]++;
diff --git a/re2/re2.h b/re2/re2.h
index f846029..216347d 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -696,7 +696,7 @@ class RE2 {
   };
 
   // Returns the options set in the constructor.
-  const Options& options() const { return options_; };
+  const Options& options() const { return options_; }
 
   // Argument converters; see below.
   static inline Arg CRadix(short* x);
@@ -799,22 +799,22 @@ class RE2::Arg {
   Arg(type* p) : arg_(p), parser_(name) {} \
   Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
 
-  MAKE_PARSER(char,               parse_char);
-  MAKE_PARSER(signed char,        parse_schar);
-  MAKE_PARSER(unsigned char,      parse_uchar);
-  MAKE_PARSER(float,              parse_float);
-  MAKE_PARSER(double,             parse_double);
-  MAKE_PARSER(string,             parse_string);
-  MAKE_PARSER(StringPiece,        parse_stringpiece);
-
-  MAKE_PARSER(short,              parse_short);
-  MAKE_PARSER(unsigned short,     parse_ushort);
-  MAKE_PARSER(int,                parse_int);
-  MAKE_PARSER(unsigned int,       parse_uint);
-  MAKE_PARSER(long,               parse_long);
-  MAKE_PARSER(unsigned long,      parse_ulong);
-  MAKE_PARSER(long long,          parse_longlong);
-  MAKE_PARSER(unsigned long long, parse_ulonglong);
+  MAKE_PARSER(char,               parse_char)
+  MAKE_PARSER(signed char,        parse_schar)
+  MAKE_PARSER(unsigned char,      parse_uchar)
+  MAKE_PARSER(float,              parse_float)
+  MAKE_PARSER(double,             parse_double)
+  MAKE_PARSER(string,             parse_string)
+  MAKE_PARSER(StringPiece,        parse_stringpiece)
+
+  MAKE_PARSER(short,              parse_short)
+  MAKE_PARSER(unsigned short,     parse_ushort)
+  MAKE_PARSER(int,                parse_int)
+  MAKE_PARSER(unsigned int,       parse_uint)
+  MAKE_PARSER(long,               parse_long)
+  MAKE_PARSER(unsigned long,      parse_ulong)
+  MAKE_PARSER(long long,          parse_longlong)
+  MAKE_PARSER(unsigned long long, parse_ulonglong)
 
 #undef MAKE_PARSER
 
@@ -849,16 +849,16 @@ class RE2::Arg {
  public:                                                                   \
   static bool parse_##name##_hex(const char* str, size_t n, void* dest);   \
   static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
-  static bool parse_##name##_cradix(const char* str, size_t n, void* dest)
-
-  DECLARE_INTEGER_PARSER(short);
-  DECLARE_INTEGER_PARSER(ushort);
-  DECLARE_INTEGER_PARSER(int);
-  DECLARE_INTEGER_PARSER(uint);
-  DECLARE_INTEGER_PARSER(long);
-  DECLARE_INTEGER_PARSER(ulong);
-  DECLARE_INTEGER_PARSER(longlong);
-  DECLARE_INTEGER_PARSER(ulonglong);
+  static bool parse_##name##_cradix(const char* str, size_t n, void* dest);
+
+  DECLARE_INTEGER_PARSER(short)
+  DECLARE_INTEGER_PARSER(ushort)
+  DECLARE_INTEGER_PARSER(int)
+  DECLARE_INTEGER_PARSER(uint)
+  DECLARE_INTEGER_PARSER(long)
+  DECLARE_INTEGER_PARSER(ulong)
+  DECLARE_INTEGER_PARSER(longlong)
+  DECLARE_INTEGER_PARSER(ulonglong)
 
 #undef DECLARE_INTEGER_PARSER
 
diff --git a/re2/stringpiece.h b/re2/stringpiece.h
index 1db7a2a..1d9c2d3 100644
--- a/re2/stringpiece.h
+++ b/re2/stringpiece.h
@@ -19,12 +19,20 @@
 //
 // Arghh!  I wish C++ literals were "string".
 
+// Doing this simplifies the logic below.
+#ifndef __has_include
+#define __has_include(x) 0
+#endif
+
 #include <stddef.h>
 #include <string.h>
 #include <algorithm>
 #include <iosfwd>
 #include <iterator>
 #include <string>
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+#include <string_view>
+#endif
 
 namespace re2 {
 
@@ -49,6 +57,10 @@ class StringPiece {
   // expected.
   StringPiece()
       : data_(NULL), size_(0) {}
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+  StringPiece(const std::string_view& str)
+      : data_(str.data()), size_(str.size()) {}
+#endif
   StringPiece(const std::string& str)
       : data_(str.data()), size_(str.size()) {}
   StringPiece(const char* str)
diff --git a/util/sparse_array.h b/util/sparse_array.h
index 86d4d50..c81c9f3 100644
--- a/util/sparse_array.h
+++ b/util/sparse_array.h
@@ -55,7 +55,7 @@
 
 // IMPLEMENTATION
 //
-// SparseArray is an array dense_ and an array sparse_, both of size max_size_.
+// SparseArray is an array dense_ and an array sparse_ of identical size.
 // At any point, the number of elements in the sparse array is size_.
 //
 // The array dense_ contains the size_ elements in the sparse array (with
@@ -80,15 +80,11 @@
 // To insert a new entry, set sparse_[i] to size_,
 // initialize dense_[size_], and then increment size_.
 //
-// Deletion of specific values from the array is implemented by
-// swapping dense_[size_-1] and the dense_ being deleted and then
-// updating the appropriate sparse_ entries.
-//
 // To make the sparse array as efficient as possible for non-primitive types,
 // elements may or may not be destroyed when they are deleted from the sparse
-// array through a call to erase(), erase_existing() or resize(). They
-// immediately become inaccessible, but they are only guaranteed to be
-// destroyed when the SparseArray destructor is called.
+// array through a call to resize(). They immediately become inaccessible, but
+// they are only guaranteed to be destroyed when the SparseArray destructor is
+// called.
 //
 // A moved-from SparseArray will be empty.
 
@@ -99,15 +95,15 @@
 
 #include <assert.h>
 #include <stdint.h>
-#include <string.h>
 #if __has_feature(memory_sanitizer)
 #include <sanitizer/msan_interface.h>
 #endif
 #include <algorithm>
 #include <memory>
-#include <type_traits>
 #include <utility>
 
+#include "util/pod_array.h"
+
 namespace re2 {
 
 template<typename Value>
@@ -119,20 +115,15 @@ class SparseArray {
 
   // IndexValue pairs: exposed in SparseArray::iterator.
   class IndexValue;
-  static_assert(std::is_trivially_destructible<IndexValue>::value,
-                "IndexValue must be trivially destructible");
 
-  typedef IndexValue value_type;
   typedef IndexValue* iterator;
   typedef const IndexValue* const_iterator;
 
   SparseArray(const SparseArray& src);
-  SparseArray(SparseArray&& src) /*noexcept*/;
+  SparseArray(SparseArray&& src);
 
   SparseArray& operator=(const SparseArray& src);
-  SparseArray& operator=(SparseArray&& src) /*noexcept*/;
-
-  const IndexValue& iv(int i) const;
+  SparseArray& operator=(SparseArray&& src);
 
   // Return the number of entries in the array.
   int size() const {
@@ -146,27 +137,30 @@ class SparseArray {
 
   // Iterate over the array.
   iterator begin() {
-    return dense_.get();
+    return dense_.data();
   }
   iterator end() {
-    return dense_.get() + size_;
+    return dense_.data() + size_;
   }
 
   const_iterator begin() const {
-    return dense_.get();
+    return dense_.data();
   }
   const_iterator end() const {
-    return dense_.get() + size_;
+    return dense_.data() + size_;
   }
 
   // Change the maximum size of the array.
   // Invalidates all iterators.
-  void resize(int max_size);
+  void resize(int new_max_size);
 
   // Return the maximum size of the array.
   // Indices can be in the range [0, max_size).
   int max_size() const {
-    return max_size_;
+    if (dense_.data() != NULL)
+      return dense_.size();
+    else
+      return 0;
   }
 
   // Clear the array.
@@ -188,122 +182,56 @@ class SparseArray {
   iterator set(int i, const Value& v) {
     return SetInternal(true, i, v);
   }
-  iterator set(int i, Value&& v) {  // NOLINT
-    return SetInternal(true, i, std::move(v));
-  }
-
-  std::pair<iterator, bool> insert(const value_type& v) {
-    return InsertInternal(v);
-  }
-  std::pair<iterator, bool> insert(value_type&& v) {  // NOLINT
-    return InsertInternal(std::move(v));
-  }
-
-  template <typename... Args>
-  std::pair<iterator, bool> emplace(Args&&... args) {  // NOLINT
-    return InsertInternal(value_type(std::forward<Args>(args)...));
-  }
-
-  iterator find(int i) {
-    if (has_index(i))
-      return dense_.get() + sparse_[i];
-    return end();
-  }
 
-  const_iterator find(int i) const {
-    if (has_index(i))
-      return dense_.get() + sparse_[i];
-    return end();
+  // Set the value at new index i to v.
+  // Fast but unsafe: only use if has_index(i) is false.
+  iterator set_new(int i, const Value& v) {
+    return SetInternal(false, i, v);
   }
 
-  // Change the value at index i to v.
+  // Set the value at index i to v.
   // Fast but unsafe: only use if has_index(i) is true.
   iterator set_existing(int i, const Value& v) {
     return SetExistingInternal(i, v);
   }
-  iterator set_existing(int i, Value&& v) {  // NOLINT
-    return SetExistingInternal(i, std::move(v));
-  }
 
-  // Set the value at the new index i to v.
-  // Fast but unsafe: only use if has_index(i) is false.
-  iterator set_new(int i, const Value& v) {
-    return SetInternal(false, i, v);
+  // Get the value at index i.
+  // Fast but unsafe: only use if has_index(i) is true.
+  Value& get_existing(int i) {
+    assert(has_index(i));
+    return dense_[sparse_[i]].value_;
   }
-  iterator set_new(int i, Value&& v) {  // NOLINT
-    return SetInternal(false, i, std::move(v));
+  const Value& get_existing(int i) const {
+    assert(has_index(i));
+    return dense_[sparse_[i]].value_;
   }
 
-  // Get the value at index i from the array..
-  // Fast but unsafe: only use if has_index(i) is true.
-  const Value& get_existing(int i) const;
-
-  // Erasing items from the array during iteration is in general
-  // NOT safe.  There is one special case, which is that the current
-  // index-value pair can be erased as long as the iterator is then
-  // checked for being at the end before being incremented.
-  // For example:
-  //
-  //   for (i = m.begin(); i != m.end(); ++i) {
-  //     if (ShouldErase(i->index(), i->value())) {
-  //       m.erase(i->index());
-  //       --i;
-  //     }
-  //   }
-  //
-  // Except in the specific case just described, elements must
-  // not be erased from the array (including clearing the array)
-  // while iterators are walking over the array.  Otherwise,
-  // the iterators could walk past the end of the array.
-
-  // Erases the element at index i from the array.
-  void erase(int i);
-
-  // Erases the element at index i from the array.
-  // Fast but unsafe: only use if has_index(i) is true.
-  void erase_existing(int i);
-
  private:
-  template <typename U>
-  std::pair<iterator, bool> InsertInternal(U&& v) {
-    DebugCheckInvariants();
-    std::pair<iterator, bool> p;
-    if (has_index(v.index_)) {
-      p = {dense_.get() + sparse_[v.index_], false};
-    } else {
-      p = {set_new(std::forward<U>(v).index_, std::forward<U>(v).second), true};
-    }
+  iterator SetInternal(bool allow_existing, int i, const Value& v) {
     DebugCheckInvariants();
-    return p;
-  }
-
-  template <typename U>
-  iterator SetInternal(bool allow_overwrite, int i, U&& v) {  // NOLINT
-    DebugCheckInvariants();
-    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
+    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
       assert(false && "illegal index");
       // Semantically, end() would be better here, but we already know
       // the user did something stupid, so begin() insulates them from
       // dereferencing an invalid pointer.
       return begin();
     }
-    if (!allow_overwrite) {
+    if (!allow_existing) {
       assert(!has_index(i));
       create_index(i);
     } else {
       if (!has_index(i))
         create_index(i);
     }
-    return set_existing(i, std::forward<U>(v));  // NOLINT
+    return SetExistingInternal(i, v);
   }
 
-  template <typename U>
-  iterator SetExistingInternal(int i, U&& v) {  // NOLINT
+  iterator SetExistingInternal(int i, const Value& v) {
     DebugCheckInvariants();
     assert(has_index(i));
-    dense_[sparse_[i]].value() = std::forward<U>(v);
+    dense_[sparse_[i]].value_ = v;
     DebugCheckInvariants();
-    return dense_.get() + sparse_[i];
+    return dense_.data() + sparse_[i];
   }
 
   // Add the index i to the array.
@@ -321,7 +249,7 @@ class SparseArray {
   // Initializes memory for elements [min, max).
   void MaybeInitializeMemory(int min, int max) {
 #if __has_feature(memory_sanitizer)
-    __msan_unpoison(sparse_.get() + min, (max - min) * sizeof sparse_[0]);
+    __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
 #elif defined(RE2_ON_VALGRIND)
     for (int i = min; i < max; i++) {
       sparse_[i] = 0xababababU;
@@ -330,9 +258,8 @@ class SparseArray {
   }
 
   int size_ = 0;
-  int max_size_ = 0;
-  std::unique_ptr<int[]> sparse_;
-  std::unique_ptr<IndexValue[]> dense_;
+  PODArray<int> sparse_;
+  PODArray<IndexValue> dense_;
 };
 
 template<typename Value>
@@ -341,109 +268,79 @@ SparseArray<Value>::SparseArray() = default;
 template<typename Value>
 SparseArray<Value>::SparseArray(const SparseArray& src)
     : size_(src.size_),
-      max_size_(src.max_size_),
-      sparse_(new int[max_size_]),
-      dense_(new IndexValue[max_size_]) {
-  std::copy_n(src.sparse_.get(), max_size_, sparse_.get());
-  std::copy_n(src.dense_.get(), max_size_, dense_.get());
+      sparse_(src.max_size()),
+      dense_(src.max_size()) {
+  std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
+  std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
 }
 
 template<typename Value>
-SparseArray<Value>::SparseArray(SparseArray&& src) /*noexcept*/  // NOLINT
+SparseArray<Value>::SparseArray(SparseArray&& src)
     : size_(src.size_),
-      max_size_(src.max_size_),
       sparse_(std::move(src.sparse_)),
       dense_(std::move(src.dense_)) {
   src.size_ = 0;
-  src.max_size_ = 0;
 }
 
 template<typename Value>
 SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) {
+  // Construct these first for exception safety.
+  PODArray<int> a(src.max_size());
+  PODArray<IndexValue> b(src.max_size());
+
   size_ = src.size_;
-  max_size_ = src.max_size_;
-  std::unique_ptr<int[]> a(new int[max_size_]);
-  std::copy_n(src.sparse_.get(), src.max_size_, a.get());
   sparse_ = std::move(a);
-  std::unique_ptr<IndexValue[]> b(new IndexValue[max_size_]);
-  std::copy_n(src.dense_.get(), src.max_size_, b.get());
   dense_ = std::move(b);
+  std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
+  std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
   return *this;
 }
 
 template<typename Value>
-SparseArray<Value>& SparseArray<Value>::operator=(
-    SparseArray&& src) /*noexcept*/ {  // NOLINT
+SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) {
   size_ = src.size_;
-  max_size_ = src.max_size_;
   sparse_ = std::move(src.sparse_);
   dense_ = std::move(src.dense_);
-  // clear out the source
   src.size_ = 0;
-  src.max_size_ = 0;
   return *this;
 }
 
 // IndexValue pairs: exposed in SparseArray::iterator.
 template<typename Value>
 class SparseArray<Value>::IndexValue {
-  friend class SparseArray;
  public:
-  typedef int first_type;
-  typedef Value second_type;
-
-  IndexValue() {}
-  IndexValue(int i, const Value& v) : index_(i), second(v) {}
-  IndexValue(int i, Value&& v) : index_(i), second(std::move(v)) {}
-
   int index() const { return index_; }
-
-  Value& value() /*&*/ { return second; }
-  const Value& value() const /*&*/ { return second; }
-  //Value&& value() /*&&*/ { return std::move(second); }  // NOLINT
+  Value& value() { return value_; }
+  const Value& value() const { return value_; }
 
  private:
+  friend class SparseArray;
   int index_;
-
- public:
-  // Provide the data in the 'second' member so that the utilities
-  // in map-util work.
-  // TODO(billydonahue): 'second' is public for short-term compatibility.
-  // Users will be transitioned to using value() accessor.
-  Value second;
+  Value value_;
 };
 
-template<typename Value>
-const typename SparseArray<Value>::IndexValue&
-SparseArray<Value>::iv(int i) const {
-  assert(i >= 0);
-  assert(i < size_);
-  return dense_[i];
-}
-
 // Change the maximum size of the array.
 // Invalidates all iterators.
 template<typename Value>
-void SparseArray<Value>::resize(int max_size) {
+void SparseArray<Value>::resize(int new_max_size) {
   DebugCheckInvariants();
-  if (max_size > max_size_) {
-    std::unique_ptr<int[]> a(new int[max_size]);
-    if (sparse_) {
-      std::copy_n(sparse_.get(), max_size_, a.get());
-    }
-    sparse_ = std::move(a);
+  if (new_max_size > max_size()) {
+    const int old_max_size = max_size();
 
-    std::unique_ptr<IndexValue[]> b(new IndexValue[max_size]);
-    if (dense_) {
-      std::copy_n(dense_.get(), max_size_, b.get());
-    }
+    // Construct these first for exception safety.
+    PODArray<int> a(new_max_size);
+    PODArray<IndexValue> b(new_max_size);
+
+    std::copy_n(sparse_.data(), old_max_size, a.data());
+    std::copy_n(dense_.data(), old_max_size, b.data());
+
+    sparse_ = std::move(a);
     dense_ = std::move(b);
 
-    MaybeInitializeMemory(max_size_, max_size);
+    MaybeInitializeMemory(old_max_size, new_max_size);
   }
-  max_size_ = max_size;
-  if (size_ > max_size_)
-    size_ = max_size_;
+  if (size_ > new_max_size)
+    size_ = new_max_size;
   DebugCheckInvariants();
 }
 
@@ -451,8 +348,8 @@ void SparseArray<Value>::resize(int max_size) {
 template<typename Value>
 bool SparseArray<Value>::has_index(int i) const {
   assert(i >= 0);
-  assert(i < max_size_);
-  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
+  assert(i < max_size());
+  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
     return false;
   }
   // Unsigned comparison avoids checking sparse_[i] < 0.
@@ -461,47 +358,17 @@ bool SparseArray<Value>::has_index(int i) const {
 }
 
 template<typename Value>
-const Value& SparseArray<Value>::get_existing(int i) const {
-  assert(has_index(i));
-  return dense_[sparse_[i]].second;
-}
-
-template<typename Value>
-void SparseArray<Value>::erase(int i) {
-  DebugCheckInvariants();
-  if (has_index(i))
-    erase_existing(i);
-  DebugCheckInvariants();
-}
-
-template<typename Value>
-void SparseArray<Value>::erase_existing(int i) {
-  DebugCheckInvariants();
-  assert(has_index(i));
-  int di = sparse_[i];
-  if (di < size_ - 1) {
-    dense_[di] = std::move(dense_[size_ - 1]);
-    sparse_[dense_[di].index_] = di;
-  }
-  size_--;
-  DebugCheckInvariants();
-}
-
-template<typename Value>
 void SparseArray<Value>::create_index(int i) {
   assert(!has_index(i));
-  assert(size_ < max_size_);
+  assert(size_ < max_size());
   sparse_[i] = size_;
   dense_[size_].index_ = i;
   size_++;
 }
 
-template<typename Value> SparseArray<Value>::SparseArray(int max_size) {
-  sparse_.reset(new int[max_size]);
-  dense_.reset(new IndexValue[max_size]);
-  size_ = 0;
+template<typename Value> SparseArray<Value>::SparseArray(int max_size) :
+    sparse_(max_size), dense_(max_size) {
   MaybeInitializeMemory(size_, max_size);
-  max_size_ = max_size;
   DebugCheckInvariants();
 }
 
@@ -511,8 +378,7 @@ template<typename Value> SparseArray<Value>::~SparseArray() {
 
 template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
   assert(0 <= size_);
-  assert(size_ <= max_size_);
-  assert(size_ == 0 || sparse_ != NULL);
+  assert(size_ <= max_size());
 }
 
 // Comparison function for sorting.
diff --git a/util/sparse_set.h b/util/sparse_set.h
index 2efdd28..0d5ad51 100644
--- a/util/sparse_set.h
+++ b/util/sparse_set.h
@@ -54,7 +54,6 @@
 
 #include <assert.h>
 #include <stdint.h>
-#include <string.h>
 #if __has_feature(memory_sanitizer)
 #include <sanitizer/msan_interface.h>
 #endif
@@ -62,6 +61,8 @@
 #include <memory>
 #include <utility>
 
+#include "util/pod_array.h"
+
 namespace re2 {
 
 template<typename Value>
@@ -86,27 +87,30 @@ class SparseSetT {
 
   // Iterate over the set.
   iterator begin() {
-    return dense_.get();
+    return dense_.data();
   }
   iterator end() {
-    return dense_.get() + size_;
+    return dense_.data() + size_;
   }
 
   const_iterator begin() const {
-    return dense_.get();
+    return dense_.data();
   }
   const_iterator end() const {
-    return dense_.get() + size_;
+    return dense_.data() + size_;
   }
 
   // Change the maximum size of the set.
   // Invalidates all iterators.
-  void resize(int max_size);
+  void resize(int new_max_size);
 
   // Return the maximum size of the set.
   // Indices can be in the range [0, max_size).
   int max_size() const {
-    return max_size_;
+    if (dense_.data() != NULL)
+      return dense_.size();
+    else
+      return 0;
   }
 
   // Clear the set.
@@ -138,7 +142,7 @@ class SparseSetT {
  private:
   iterator InsertInternal(bool allow_existing, int i) {
     DebugCheckInvariants();
-    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
+    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
       assert(false && "illegal index");
       // Semantically, end() would be better here, but we already know
       // the user did something stupid, so begin() insulates them from
@@ -153,7 +157,7 @@ class SparseSetT {
         create_index(i);
     }
     DebugCheckInvariants();
-    return dense_.get() + sparse_[i];
+    return dense_.data() + sparse_[i];
   }
 
   // Add the index i to the set.
@@ -170,7 +174,7 @@ class SparseSetT {
   // Initializes memory for elements [min, max).
   void MaybeInitializeMemory(int min, int max) {
 #if __has_feature(memory_sanitizer)
-    __msan_unpoison(sparse_.get() + min, (max - min) * sizeof sparse_[0]);
+    __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
 #elif defined(RE2_ON_VALGRIND)
     for (int i = min; i < max; i++) {
       sparse_[i] = 0xababababU;
@@ -179,9 +183,8 @@ class SparseSetT {
   }
 
   int size_ = 0;
-  int max_size_ = 0;
-  std::unique_ptr<int[]> sparse_;
-  std::unique_ptr<int[]> dense_;
+  PODArray<int> sparse_;
+  PODArray<int> dense_;
 };
 
 template<typename Value>
@@ -190,26 +193,25 @@ SparseSetT<Value>::SparseSetT() = default;
 // Change the maximum size of the set.
 // Invalidates all iterators.
 template<typename Value>
-void SparseSetT<Value>::resize(int max_size) {
+void SparseSetT<Value>::resize(int new_max_size) {
   DebugCheckInvariants();
-  if (max_size > max_size_) {
-    std::unique_ptr<int[]> a(new int[max_size]);
-    if (sparse_) {
-      std::copy_n(sparse_.get(), max_size_, a.get());
-    }
-    sparse_ = std::move(a);
+  if (new_max_size > max_size()) {
+    const int old_max_size = max_size();
 
-    std::unique_ptr<int[]> b(new int[max_size]);
-    if (dense_) {
-      std::copy_n(dense_.get(), max_size_, b.get());
-    }
+    // Construct these first for exception safety.
+    PODArray<int> a(new_max_size);
+    PODArray<int> b(new_max_size);
+
+    std::copy_n(sparse_.data(), old_max_size, a.data());
+    std::copy_n(dense_.data(), old_max_size, b.data());
+
+    sparse_ = std::move(a);
     dense_ = std::move(b);
 
-    MaybeInitializeMemory(max_size_, max_size);
+    MaybeInitializeMemory(old_max_size, new_max_size);
   }
-  max_size_ = max_size;
-  if (size_ > max_size_)
-    size_ = max_size_;
+  if (size_ > new_max_size)
+    size_ = new_max_size;
   DebugCheckInvariants();
 }
 
@@ -217,8 +219,8 @@ void SparseSetT<Value>::resize(int max_size) {
 template<typename Value>
 bool SparseSetT<Value>::contains(int i) const {
   assert(i >= 0);
-  assert(i < max_size_);
-  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
+  assert(i < max_size());
+  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
     return false;
   }
   // Unsigned comparison avoids checking sparse_[i] < 0.
@@ -229,18 +231,15 @@ bool SparseSetT<Value>::contains(int i) const {
 template<typename Value>
 void SparseSetT<Value>::create_index(int i) {
   assert(!contains(i));
-  assert(size_ < max_size_);
+  assert(size_ < max_size());
   sparse_[i] = size_;
   dense_[size_] = i;
   size_++;
 }
 
-template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) {
-  sparse_.reset(new int[max_size]);
-  dense_.reset(new int[max_size]);
-  size_ = 0;
+template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
+    sparse_(max_size), dense_(max_size) {
   MaybeInitializeMemory(size_, max_size);
-  max_size_ = max_size;
   DebugCheckInvariants();
 }
 
@@ -250,8 +249,7 @@ template<typename Value> SparseSetT<Value>::~SparseSetT() {
 
 template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
   assert(0 <= size_);
-  assert(size_ <= max_size_);
-  assert(size_ == 0 || sparse_ != NULL);
+  assert(size_ <= max_size());
 }
 
 // Comparison function for sorting.
author	DongHun Kwak <dh0128.kwak@samsung.com>	2019-09-25 15:35:57 +0900
committer	DongHun Kwak <dh0128.kwak@samsung.com>	2019-09-25 15:35:57 +0900
commit	de99325ffe35c346d71d37e5f60b67e29313c1f2 (patch)
tree	937eb9611b8c5952744ba895f711b70b0c565b58
parent	07d538b47cc1d986c87b748fe5961beb1e8d2fbd (diff)
download	re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.tar.gz re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.tar.bz2 re2-de99325ffe35c346d71d37e5f60b67e29313c1f2.zip