Imported Upstream version 20160301upstream/20160301

Change-Id: I33831129b7d3a860e34c551b415ede10da350c73 Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
author: DongHun Kwak <dh0128.kwak@samsung.com> 2016-11-21 16:57:18 +0900
committer: DongHun Kwak <dh0128.kwak@samsung.com> 2016-11-21 16:57:18 +0900
commit: 4dfe393027cfcfe844a8c8b97209a714904308c4 (patch)
tree: d94a05252de9becb345ed10d7c3cfed598005923
parent: 485249b5a02cf59571cde61f83d10a6a9ec36b3d (diff)
download: re2-4dfe393027cfcfe844a8c8b97209a714904308c4.tar.gz
re2-4dfe393027cfcfe844a8c8b97209a714904308c4.tar.bz2
re2-4dfe393027cfcfe844a8c8b97209a714904308c4.zip
18 files changed, 178 insertions, 311 deletions
diff --git a/BUILD b/BUILD
index 2fb0d22..2636631 100644
--- a/BUILD
+++ b/BUILD
@@ -36,7 +36,6 @@ cc_library(
         "re2/unicode_groups.cc",
         "re2/unicode_groups.h",
         "re2/walker-inl.h",
-        "util/atomicops.h",
         "util/flags.h",
         "util/hash.cc",
         "util/logging.cc",
@@ -103,8 +102,8 @@ re2_test("filtered_re2_test")
 re2_test("mimics_pcre_test")
 re2_test("parse_test")
 re2_test("possible_match_test")
-re2_test("re2_test")
 re2_test("re2_arg_test")
+re2_test("re2_test")
 re2_test("regexp_test")
 re2_test("required_prefix_test")
 re2_test("search_test")
@@ -112,11 +111,11 @@ re2_test("set_test")
 re2_test("simplify_test")
 re2_test("string_generator_test")
 
-re2_test("dfa_test")
-re2_test("exhaustive1_test")
-re2_test("exhaustive2_test")
-re2_test("exhaustive3_test")
-re2_test("exhaustive_test")
+re2_test("dfa_test", size="large")
+re2_test("exhaustive1_test", size="large")
+re2_test("exhaustive2_test", size="large")
+re2_test("exhaustive3_test", size="large")
+re2_test("exhaustive_test", size="large")
 re2_test("random_test", size="large")
 
 # TODO: Add support for regexp_benchmark.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1c980df..3ed718f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,11 +11,19 @@ option(USEPCRE "use PCRE in tests and benchmarks" OFF)
 
 set(EXTRA_TARGET_LINK_LIBRARIES)
 
+if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+  if(MSVC_VERSION LESS 1800)
+    message(FATAL_ERROR "you need Visual Studio 2013 or later")
+  endif()
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+  add_compile_options(-std=c++11)
+endif()
+
 if(WIN32)
   add_definitions(-DUNICODE -D_UNICODE -DSTRICT -DNOMINMAX)
   set(THREADING threadwin)
-else()
-  add_definitions(-pthread)
+elseif(UNIX)
+  add_compile_options(-pthread)
   set(THREADING thread)
   list(APPEND EXTRA_TARGET_LINK_LIBRARIES -pthread)
 endif()
diff --git a/Makefile b/Makefile
index fb5d5dc..92fbfc3 100644
--- a/Makefile
+++ b/Makefile
@@ -2,15 +2,20 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 
-# to build against PCRE for testing or benchmarking,
-# uncomment the next two lines
+# To build against ICU for full Unicode properties support,
+# uncomment the next two lines:
+# CCICU=$(shell pkg-config icu-uc --cflags) -DRE2_USE_ICU
+# LDICU=$(shell pkg-config icu-uc --libs)
+
+# To build against PCRE for testing or benchmarking,
+# uncomment the next two lines:
 # CCPCRE=-I/usr/local/include -DUSEPCRE
 # LDPCRE=-L/usr/local/lib -lpcre
 
 CXX?=g++
-CXXFLAGS?=-O3 -g -pthread  # can override
-RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCPCRE)  # required
-LDFLAGS?=-pthread
+CXXFLAGS?=-std=c++11 -O3 -g -pthread  # can override
+RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE)  # required
+LDFLAGS?=-pthread $(LDICU)
 AR?=ar
 ARFLAGS?=rsc
 NM?=nm
@@ -62,7 +67,6 @@ INSTALL_HFILES=\
 	re2/variadic_function.h\
 
 HFILES=\
-	util/atomicops.h\
 	util/benchmark.h\
 	util/flags.h\
 	util/logging.h\
diff --git a/README b/README
index 2c23660..119f33c 100644
--- a/README
+++ b/README
@@ -10,6 +10,9 @@ make test
 make install
 make testinstall
 
+There is a fair amount of documentation (including code snippets) in
+the re2.h header file.
+
 More information can be found on the wiki:
 https://github.com/google/re2/wiki
 
@@ -23,6 +26,8 @@ Unless otherwise noted, the RE2 source files are distributed
 under the BSD-style license found in the LICENSE file.
 
 RE2's native language is C++.
+
+A C wrapper is at https://github.com/marcomaggi/cre2/.
 An Erlang wrapper is at https://github.com/tuncer/re2/.
 An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
 A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM.
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 5740daa..0cfa480 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -169,13 +169,13 @@ bool BitState::TrySearch(int id0, const char* p0) {
     //         << (p - text_.begin()) << " " << arg;
     Prog::Inst* ip = prog_->inst(id);
     switch (ip->opcode()) {
-      case kInstFail:
-        return false;
-
       default:
         LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg;
         return false;
 
+      case kInstFail:
+        continue;
+
       case kInstAlt:
         // Cannot just
         //   Push(ip->out1(), p, 0);
@@ -196,7 +196,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
             id = ip->out1();
             goto CheckAndLoop;
         }
-        LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;
+        LOG(DFATAL) << "Bad arg in kInstAlt: " << arg;
         continue;
 
       case kInstAltMatch:
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 1f54b9f..7c526c8 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -21,7 +21,6 @@
 //
 // See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent.
 
-#include "util/atomicops.h"
 #include "util/flags.h"
 #include "util/sparse_set.h"
 #include "re2/prog.h"
@@ -102,7 +101,7 @@ class DFA {
     uint flag_;         // Empty string bitfield flags in effect on the way
                         // into this state, along with kFlagMatch if this
                         // is a matching state.
-    State** next_;      // Outgoing arrows from State,
+    std::atomic<State*> next_[1];    // Outgoing arrows from State,
                         // one per input byte class
   };
 
@@ -283,7 +282,7 @@ class DFA {
   struct StartInfo {
     StartInfo() : start(NULL), firstbyte(kFbUnknown) { }
     State* start;
-    volatile int firstbyte;
+    std::atomic<int> firstbyte;
   };
 
   // Fills in params->start and params->firstbyte using
@@ -463,8 +462,9 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem)
   // At minimum, the search requires room for two states in order
   // to limp along, restarting frequently.  We'll get better performance
   // if there is room for a larger number of states, say 20.
-  int64 one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) +
-                    (prog_->bytemap_range()+1)*sizeof(State*);
+  int nnext = prog_->bytemap_range() + 1;  // + 1 for kByteEndText slot
+  int64 one_state = sizeof(State) + (nnext-1)*sizeof(std::atomic<State*>) +
+                    (prog_->size()+nmark)*sizeof(int);
   if (state_budget_ < 20*one_state) {
     LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld",
                               prog_->size(), max_mem);
@@ -734,7 +734,7 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) {
     mutex_.AssertHeld();
 
   // Look in the cache for a pre-existing state.
-  State state = { inst, ninst, flag, NULL };
+  State state = {inst, ninst, flag};
   StateSet::iterator it = state_cache_.find(&state);
   if (it != state_cache_.end()) {
     if (DebugDFA)
@@ -748,19 +748,23 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) {
   // State*, empirically.
   const int kStateCacheOverhead = 32;
   int nnext = prog_->bytemap_range() + 1;  // + 1 for kByteEndText slot
-  int mem = sizeof(State) + nnext*sizeof(State*) + ninst*sizeof(int);
+  int mem = sizeof(State) + (nnext-1)*sizeof(std::atomic<State*>) +
+            ninst*sizeof(int);
   if (mem_budget_ < mem + kStateCacheOverhead) {
     mem_budget_ = -1;
     return NULL;
   }
   mem_budget_ -= mem + kStateCacheOverhead;
 
-  // Allocate new state, along with room for next and inst.
+  // Allocate new state along with room for next_ and inst_.
   char* space = new char[mem];
-  State* s = reinterpret_cast<State*>(space);
-  s->next_ = reinterpret_cast<State**>(s + 1);
-  s->inst_ = reinterpret_cast<int*>(s->next_ + nnext);
-  memset(s->next_, 0, nnext*sizeof s->next_[0]);
+  State* s = new (space) State;
+  (void) new (s->next_) std::atomic<State*>[nnext];
+  // Work around a unfortunate bug in older versions of libstdc++.
+  // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658)
+  for (int i = 0; i < nnext; i++)
+    (void) new (s->next_ + i) std::atomic<State*>(NULL);
+  s->inst_ = new (s->next_ + nnext) int[ninst];
   memmove(s->inst_, inst, ninst*sizeof s->inst_[0]);
   s->ninst_ = ninst;
   s->flag_ = flag;
@@ -984,8 +988,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
   }
 
   // If someone else already computed this, return it.
-  State* ns;
-  ATOMIC_LOAD_CONSUME(ns, &state->next_[ByteMap(c)]);
+  State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed);
   if (ns != NULL)
     return ns;
 
@@ -1055,7 +1058,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
   // Write barrier before updating state->next_ so that the
   // main search loop can proceed without any locking, for speed.
   // (Otherwise it would need one mutex operation per input byte.)
-  ATOMIC_STORE_RELEASE(&state->next_[ByteMap(c)], ns);
+  state->next_[ByteMap(c)].store(ns, std::memory_order_release);
   return ns;
 }
 
@@ -1155,7 +1158,7 @@ void DFA::ResetCache(RWLocker* cache_lock) {
   // Clear the cache, reset the memory budget.
   for (int i = 0; i < kMaxStart; i++) {
     start_[i].start = NULL;
-    start_[i].firstbyte = kFbUnknown;
+    start_[i].firstbyte.store(kFbUnknown, std::memory_order_relaxed);
   }
   ClearCache();
   mem_budget_ = state_budget_;
@@ -1376,8 +1379,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
     // Okay to use bytemap[] not ByteMap() here, because
     // c is known to be an actual byte and not kByteEndText.
 
-    State* ns;
-    ATOMIC_LOAD_CONSUME(ns, &s->next_[bytemap[c]]);
+    State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire);
     if (ns == NULL) {
       ns = RunStateOnByteUnlocked(s, c);
       if (ns == NULL) {
@@ -1464,8 +1466,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
       lastbyte = params->text.begin()[-1] & 0xFF;
   }
 
-  State* ns;
-  ATOMIC_LOAD_CONSUME(ns, &s->next_[ByteMap(lastbyte)]);
+  State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire);
   if (ns == NULL) {
     ns = RunStateOnByteUnlocked(s, lastbyte);
     if (ns == NULL) {
@@ -1653,16 +1654,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
     }
   }
 
-  if (DebugDFA) {
-    int fb;
-    ATOMIC_LOAD_RELAXED(fb, &info->firstbyte);
+  if (DebugDFA)
     fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n",
             params->anchored, params->run_forward, flags,
-            DumpState(info->start).c_str(), fb);
-  }
+            DumpState(info->start).c_str(), info->firstbyte.load());
 
   params->start = info->start;
-  ATOMIC_LOAD_ACQUIRE(params->firstbyte, &info->firstbyte);
+  params->firstbyte = info->firstbyte.load(std::memory_order_acquire);
 
   return true;
 }
@@ -1671,13 +1669,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
 bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
                               uint flags) {
   // Quick check.
-  int fb;
-  ATOMIC_LOAD_ACQUIRE(fb, &info->firstbyte);
+  int fb = info->firstbyte.load(std::memory_order_acquire);
   if (fb != kFbUnknown)
     return true;
 
   MutexLock l(&mutex_);
-  if (info->firstbyte != kFbUnknown)
+  fb = info->firstbyte.load(std::memory_order_relaxed);
+  if (fb != kFbUnknown)
     return true;
 
   q0_->clear();
@@ -1690,13 +1688,13 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
 
   if (info->start == DeadState) {
     // Synchronize with "quick check" above.
-    ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone);
+    info->firstbyte.store(kFbNone, std::memory_order_release);
     return true;
   }
 
   if (info->start == FullMatchState) {
     // Synchronize with "quick check" above.
-    ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone);	// will be ignored
+    info->firstbyte.store(kFbNone, std::memory_order_release);  // will be ignored
     return true;
   }
 
@@ -1708,7 +1706,7 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
     State* s = RunStateOnByte(info->start, i);
     if (s == NULL) {
       // Synchronize with "quick check" above.
-      ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte);
+      info->firstbyte.store(firstbyte, std::memory_order_release);
       return false;
     }
     if (s == info->start)
@@ -1721,8 +1719,9 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
       break;
     }
   }
+
   // Synchronize with "quick check" above.
-  ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte);
+  info->firstbyte.store(firstbyte, std::memory_order_release);
   return true;
 }
 
@@ -1784,17 +1783,21 @@ bool DFA::Search(const StringPiece& text,
 //
 // This is a separate function so that
 // prog.h can be used without moving the definition of
-// class DFA out of this file.  If you set
-//   prog->dfa_ = dfa;
+// class DFA out of this file.  If you (atomically) set
+//   prog->dfa_first_ = dfa;
+// or
+//   prog->dfa_longest_ = dfa;
 // then you also have to set
 //   prog->delete_dfa_ = DeleteDFA;
-// so that ~Prog can delete the dfa.
-static void DeleteDFA(DFA* dfa) {
-  delete dfa;
+// so that ~Prog can delete the DFA.
+static void DeleteDFA(std::atomic<DFA*>* pdfa) {
+  DFA* dfa = pdfa->load(std::memory_order_relaxed);
+  if (dfa != NULL)
+    delete dfa;
 }
 
 DFA* Prog::GetDFA(MatchKind kind) {
-  DFA*volatile* pdfa;
+  std::atomic<DFA*>* pdfa;
   if (kind == kFirstMatch || kind == kManyMatch) {
     pdfa = &dfa_first_;
   } else {
@@ -1803,13 +1806,12 @@ DFA* Prog::GetDFA(MatchKind kind) {
   }
 
   // Quick check.
-  DFA *dfa;
-  ATOMIC_LOAD_ACQUIRE(dfa, pdfa);
+  DFA* dfa = pdfa->load(std::memory_order_acquire);
   if (dfa != NULL)
     return dfa;
 
   MutexLock l(&dfa_mutex_);
-  dfa = *pdfa;
+  dfa = pdfa->load(std::memory_order_relaxed);
   if (dfa != NULL)
     return dfa;
 
@@ -1828,8 +1830,7 @@ DFA* Prog::GetDFA(MatchKind kind) {
   delete_dfa_ = DeleteDFA;
 
   // Synchronize with "quick check" above.
-  ATOMIC_STORE_RELEASE(pdfa, dfa);
-
+  pdfa->store(dfa, std::memory_order_release);
   return dfa;
 }
 
@@ -2094,19 +2095,9 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
 
 // PossibleMatchRange for a Prog.
 bool Prog::PossibleMatchRange(string* min, string* max, int maxlen) {
-  DFA* dfa = NULL;
-  {
-    MutexLock l(&dfa_mutex_);
-    // Have to use dfa_longest_ to get all strings for full matches.
-    // For example, (a|aa) never matches aa in first-match mode.
-    dfa = dfa_longest_;
-    if (dfa == NULL) {
-      dfa = new DFA(this, Prog::kLongestMatch, dfa_mem_/2);
-      ATOMIC_STORE_RELEASE(&dfa_longest_, dfa);
-      delete_dfa_ = DeleteDFA;
-    }
-  }
-  return dfa->PossibleMatchRange(min, max, maxlen);
+  // Have to use dfa_longest_ to get all strings for full matches.
+  // For example, (a|aa) never matches aa in first-match mode.
+  return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
 }
 
 }  // namespace re2
diff --git a/re2/parse.cc b/re2/parse.cc
index f51e589..f55dc8c 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -23,6 +23,12 @@
 #include "re2/unicode_groups.h"
 #include "re2/walker-inl.h"
 
+#if defined(RE2_USE_ICU)
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/utypes.h"
+#endif
+
 namespace re2 {
 
 // Regular expression parse state.
@@ -1488,11 +1494,6 @@ static const UGroup* LookupGroup(const StringPiece& name,
   return NULL;
 }
 
-// Fake UGroup containing all Runes
-static URange16 any16[] = { { 0, 65535 } };
-static URange32 any32[] = { { 65536, Runemax } };
-static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 };
-
 // Look for a POSIX group with the given name (e.g., "[:^alpha:]")
 static const UGroup* LookupPosixGroup(const StringPiece& name) {
   return LookupGroup(name, posix_groups, num_posix_groups);
@@ -1502,6 +1503,12 @@ static const UGroup* LookupPerlGroup(const StringPiece& name) {
   return LookupGroup(name, perl_groups, num_perl_groups);
 }
 
+#if !defined(RE2_USE_ICU)
+// Fake UGroup containing all Runes
+static URange16 any16[] = { { 0, 65535 } };
+static URange32 any32[] = { { 65536, Runemax } };
+static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 };
+
 // Look for a Unicode group with the given name (e.g., "Han")
 static const UGroup* LookupUnicodeGroup(const StringPiece& name) {
   // Special case: "Any" means any.
@@ -1509,6 +1516,7 @@ static const UGroup* LookupUnicodeGroup(const StringPiece& name) {
     return &anygroup;
   return LookupGroup(name, unicode_groups, num_unicode_groups);
 }
+#endif
 
 // Add a UGroup or its negation to the character class.
 static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign,
@@ -1600,7 +1608,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
   // Committed to parse.  Results:
   int sign = +1;  // -1 = negated char class
   if (c == 'P')
-    sign = -1;
+    sign = -sign;
   StringPiece seq = *s;  // \p{Han} or \pL
   StringPiece name;  // Han or L
   s->remove_prefix(2);  // '\\', 'p'
@@ -1630,11 +1638,13 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
   // Chop seq where s now begins.
   seq = StringPiece(seq.begin(), static_cast<int>(s->begin() - seq.begin()));
 
-  // Look up group
   if (name.size() > 0 && name[0] == '^') {
     sign = -sign;
     name.remove_prefix(1);  // '^'
   }
+
+#if !defined(RE2_USE_ICU)
+  // Look up the group in the RE2 Unicode data.
   const UGroup *g = LookupUnicodeGroup(name);
   if (g == NULL) {
     status->set_code(kRegexpBadCharRange);
@@ -1643,6 +1653,31 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
   }
 
   AddUGroup(cc, g, sign, parse_flags);
+#else
+  // Look up the group in the ICU Unicode data. Because ICU provides full
+  // Unicode properties support, this could be more than a lookup by name.
+  ::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8(
+      string("\\p{") + name.ToString() + string("}"));
+  UErrorCode uerr = U_ZERO_ERROR;
+  ::icu::UnicodeSet uset(ustr, uerr);
+  if (U_FAILURE(uerr)) {
+    status->set_code(kRegexpBadCharRange);
+    status->set_error_arg(seq);
+    return kParseError;
+  }
+
+  // Convert the UnicodeSet to a URange32 and UGroup that we can add.
+  int nr = uset.getRangeCount();
+  URange32* r = new URange32[nr];
+  for (int i = 0; i < nr; i++) {
+    r[i].lo = uset.getRangeStart(i);
+    r[i].hi = uset.getRangeEnd(i);
+  }
+  UGroup g = {"", +1, 0, 0, r, nr};
+  AddUGroup(cc, &g, sign, parse_flags);
+  delete[] r;
+#endif
+
   return kParseOk;
 }
 
diff --git a/re2/prog.cc b/re2/prog.cc
index 499f560..cb7e6e0 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -113,11 +113,9 @@ Prog::Prog()
 }
 
 Prog::~Prog() {
-  if (delete_dfa_) {
-    if (dfa_first_)
-      delete_dfa_(dfa_first_);
-    if (dfa_longest_)
-      delete_dfa_(dfa_longest_);
+  if (delete_dfa_ != NULL) {
+    delete_dfa_(&dfa_first_);
+    delete_dfa_(&dfa_longest_);
   }
   delete[] onepass_nodes_;
   delete[] inst_;
diff --git a/re2/prog.h b/re2/prog.h
index 8c5b2c4..45e9f0b 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -360,10 +360,10 @@ class Prog {
   Inst* inst_;              // pointer to instruction array
 
   Mutex dfa_mutex_;    // Protects dfa_first_, dfa_longest_
-  DFA* volatile dfa_first_;     // DFA cached for kFirstMatch
-  DFA* volatile dfa_longest_;   // DFA cached for kLongestMatch and kFullMatch
+  std::atomic<DFA*> dfa_first_;     // DFA cached for kFirstMatch
+  std::atomic<DFA*> dfa_longest_;   // DFA cached for kLongestMatch and kFullMatch
   int64 dfa_mem_;      // Maximum memory for DFAs.
-  void (*delete_dfa_)(DFA* dfa);
+  void (*delete_dfa_)(std::atomic<DFA*>* pdfa);
 
   Bitmap<256> byterange_;    // byterange.Get(x) true if x ends a
                              // commonly-treated byte range.
diff --git a/re2/set.h b/re2/set.h
index 1f55b61..e48d8cb 100644
--- a/re2/set.h
+++ b/re2/set.h
@@ -31,7 +31,7 @@ class RE2::Set {
 
   // Compile prepares the Set for matching.
   // Add must not be called again after Compile.
-  // Compile must be called before FullMatch or PartialMatch.
+  // Compile must be called before Match.
   // Compile may return false if it runs out of memory.
   bool Compile();
 
diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc
index a872840..2141c53 100644
--- a/re2/testing/backtrack.cc
+++ b/re2/testing/backtrack.cc
@@ -4,7 +4,7 @@
 
 // Tested by search_test.cc, exhaustive_test.cc, tester.cc
 //
-// Prog::BadSearchBacktrack is a backtracking regular expression search,
+// Prog::UnsafeSearchBacktrack is a backtracking regular expression search,
 // except that it remembers where it has been, trading a lot of
 // memory for a lot of time. It exists only for testing purposes.
 //
diff --git a/re2/testing/exhaustive1_test.cc b/re2/testing/exhaustive1_test.cc
index 9e057cc..c06a10c 100644
--- a/re2/testing/exhaustive1_test.cc
+++ b/re2/testing/exhaustive1_test.cc
@@ -33,10 +33,9 @@ TEST(Repetition, Capturing) {
                  7, Explode("ab"), "(?:%s)", "");
 
   // This would be a great test, but it runs forever when PCRE is enabled.
-  if (strstr("PCRE", FLAGS_regexp_engines.c_str()) == NULL)
-    ExhaustiveTest(4, 3, Split(" ", "a (a)"), ops,
-                   100, Explode("a"), "(?:%s)", "");
+  if (!StringPiece(FLAGS_regexp_engines).contains("PCRE"))
+    ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops,
+                   50, Explode("a"), "(?:%s)", "");
 }
 
 }  // namespace re2
-
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index e201c1e..5a32d92 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -6,7 +6,7 @@
 // TODO: Test extractions for PartialMatch/Consume
 
 #include <errno.h>
-#ifndef _MSC_VER
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>  /* for sysconf */
 #include <sys/mman.h>
 #endif
@@ -17,8 +17,6 @@
 #include "re2/re2.h"
 #include "re2/regexp.h"
 
-DECLARE_bool(logtostderr);
-
 namespace re2 {
 
 TEST(RE2, HexTests) {
@@ -1540,7 +1538,7 @@ TEST(RE2, Bug18458852) {
 }
 
 TEST(RE2, Bug18523943) {
-  // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL).
+  // Bug in BitState: case kFailInst failed the match entirely.
 
   RE2::Options opt;
   const char a[] = {
@@ -1559,7 +1557,7 @@ TEST(RE2, Bug18523943) {
   RE2 re((const char*)b, opt);
   CHECK(re.ok());
   string s1;
-  CHECK(!RE2::PartialMatch((const char*)a, re, &s1));
+  CHECK(RE2::PartialMatch((const char*)a, re, &s1));
 }
 
 TEST(RE2, Bug21371806) {
@@ -1585,10 +1583,10 @@ TEST(RE2, Bug26356109) {
   string s = "abc";
   StringPiece m;
 
-  CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
+  CHECK(re.Match(s, 0, static_cast<int>(s.size()), RE2::UNANCHORED, &m, 1));
   CHECK_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
 
-  CHECK(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
+  CHECK(re.Match(s, 0, static_cast<int>(s.size()), RE2::ANCHOR_BOTH, &m, 1));
   CHECK_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
 }
 
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
index 141bad1..8d80b4c 100644
--- a/re2/testing/regexp_benchmark.cc
+++ b/re2/testing/regexp_benchmark.cc
@@ -299,9 +299,9 @@ void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search
 // Unambiguous search (RE2 can use OnePass).
 
 void Search_Success_DFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchDFA); }
-void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
 void Search_Success_PCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchPCRE); }
 void Search_Success_RE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchRE2); }
+void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
 
 BENCHMARK_RANGE(Search_Success_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
 #ifdef USEPCRE
@@ -311,9 +311,9 @@ BENCHMARK_RANGE(Search_Success_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
 
 void Search_Success_CachedDFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
-void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
 void Search_Success_CachedPCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
 void Search_Success_CachedRE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
+void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
 
 BENCHMARK_RANGE(Search_Success_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
 #ifdef USEPCRE
@@ -323,28 +323,31 @@ BENCHMARK_RANGE(Search_Success_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs
 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
 
 // Ambiguous search (RE2 cannot use OnePass).
+// Used to be ".*.$", but that is coalesced to ".+$" these days.
 
-void Search_Success1_DFA(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchDFA); }
-void Search_Success1_PCRE(int i, int n)    { SearchSuccess(i, n, ".*.$", SearchPCRE); }
-void Search_Success1_RE2(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchRE2); }
-void Search_Success1_BitState(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchBitState); }
+void Search_Success1_DFA(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchDFA); }
+void Search_Success1_PCRE(int i, int n)     { SearchSuccess(i, n, ".*\\C$", SearchPCRE); }
+void Search_Success1_RE2(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchRE2); }
+void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchBitState); }
 
-BENCHMARK_RANGE(Search_Success1_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
 #ifdef USEPCRE
-BENCHMARK_RANGE(Search_Success1_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
 #endif
-BENCHMARK_RANGE(Search_Success1_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
 
-void Search_Success1_Cached_DFA(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchCachedDFA); }
-void Search_Success1_Cached_PCRE(int i, int n)    { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); }
-void Search_Success1_Cached_RE2(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchCachedRE2); }
+void Search_Success1_Cached_DFA(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchCachedDFA); }
+void Search_Success1_Cached_PCRE(int i, int n)     { SearchSuccess(i, n, ".*\\C$", SearchCachedPCRE); }
+void Search_Success1_Cached_RE2(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchCachedRE2); }
+void Search_Success1_Cached_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedBitState); }
 
-BENCHMARK_RANGE(Search_Success1_Cached_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_Cached_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
 #ifdef USEPCRE
-BENCHMARK_RANGE(Search_Success1_Cached_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_Cached_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
 #endif
-BENCHMARK_RANGE(Search_Success1_Cached_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_Cached_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_Cached_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
 
 // Benchmark: use regexp to find phone number.
 
diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
index cb12bad..62cbb6c 100644
--- a/re2/testing/tester.cc
+++ b/re2/testing/tester.cc
@@ -26,7 +26,7 @@ enum {
   kMaxSubmatch = 1+16,  // $0...$16
 };
 
-const char* engine_types[kEngineMax] = {
+const char* engine_names[kEngineMax] = {
   "Backtrack",
   "NFA",
   "DFA",
@@ -39,18 +39,18 @@ const char* engine_types[kEngineMax] = {
   "PCRE",
 };
 
-// Returns the name string for the type t.
-static string EngineString(Engine t) {
-  if (t < 0 || t >= arraysize(engine_types) || engine_types[t] == NULL) {
-    return StringPrintf("type%d", static_cast<int>(t));
-  }
-  return engine_types[t];
+// Returns the name of the engine.
+static StringPiece EngineName(Engine e) {
+  CHECK_GE(e, 0);
+  CHECK_LT(e, arraysize(engine_names));
+  CHECK(engine_names[e] != NULL);
+  return engine_names[e];
 }
 
 // Returns bit mask of engines to use.
 static uint32 Engines() {
-  static uint32 cached_engines;
-  static bool did_parse;
+  static bool did_parse = false;
+  static uint32 cached_engines = 0;
 
   if (did_parse)
     return cached_engines;
@@ -59,7 +59,7 @@ static uint32 Engines() {
     cached_engines = ~0;
   } else {
     for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
-      if (strstr(EngineString(i).c_str(), FLAGS_regexp_engines.c_str()))
+      if (StringPiece(FLAGS_regexp_engines).contains(EngineName(i)))
         cached_engines |= 1<<i;
   }
 
@@ -69,8 +69,9 @@ static uint32 Engines() {
     cached_engines &= ~(1<<kEnginePCRE);
   for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
     if (cached_engines & (1<<i))
-      LOG(INFO) << EngineString(i) << " enabled";
+      LOG(INFO) << EngineName(i) << " enabled";
   }
+
   did_parse = true;
   return cached_engines;
 }
@@ -566,7 +567,7 @@ void TestInstance::LogMatch(const char* prefix, Engine e,
                             const StringPiece& text, const StringPiece& context,
                             Prog::Anchor anchor) {
   LOG(INFO) << prefix
-    << EngineString(e)
+    << EngineName(e)
     << " regexp "
     << CEscape(regexp_str_)
     << " "
diff --git a/re2/testing/tester.h b/re2/testing/tester.h
index d1e1b22..e9fae85 100644
--- a/re2/testing/tester.h
+++ b/re2/testing/tester.h
@@ -20,7 +20,7 @@ class Regexp;
 
 // All the supported regexp engines.
 enum Engine {
-  kEngineBacktrack = 0,    // Prog::BadSearchBacktrack
+  kEngineBacktrack = 0,    // Prog::UnsafeSearchBacktrack
   kEngineNFA,              // Prog::SearchNFA
   kEngineDFA,              // Prog::SearchDFA, only ask whether it matched
   kEngineDFA1,             // Prog::SearchDFA, ask for match[0]
diff --git a/util/atomicops.h b/util/atomicops.h
deleted file mode 100644
index dc944e7..0000000
--- a/util/atomicops.h
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_UTIL_ATOMICOPS_H__
-#define RE2_UTIL_ATOMICOPS_H__
-
-// The memory ordering constraints resemble the ones in C11.
-// RELAXED - no memory ordering, just an atomic operation.
-// CONSUME - data-dependent ordering.
-// ACQUIRE - prevents memory accesses from hoisting above the operation.
-// RELEASE - prevents memory accesses from sinking below the operation.
-
-#ifndef __has_builtin
-#define __has_builtin(x) 0
-#endif
-
-#if !defined(OS_NACL) && (__has_builtin(__atomic_load_n) || (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__ >= 40801))
-
-#define ATOMIC_LOAD_RELAXED(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_RELAXED); } while (0)
-#define ATOMIC_LOAD_CONSUME(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_CONSUME); } while (0)
-#define ATOMIC_LOAD_ACQUIRE(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_ACQUIRE); } while (0)
-#define ATOMIC_STORE_RELAXED(p, v) __atomic_store_n((p), (v), __ATOMIC_RELAXED)
-#define ATOMIC_STORE_RELEASE(p, v) __atomic_store_n((p), (v), __ATOMIC_RELEASE)
-
-#else  // old compiler
-
-#define ATOMIC_LOAD_RELAXED(x, p) do { (x) = *(p); } while (0)
-#define ATOMIC_LOAD_CONSUME(x, p) do { (x) = *(p); MaybeReadMemoryBarrier(); } while (0)
-#define ATOMIC_LOAD_ACQUIRE(x, p) do { (x) = *(p); ReadMemoryBarrier(); } while (0)
-#define ATOMIC_STORE_RELAXED(p, v) do { *(p) = (v); } while (0)
-#define ATOMIC_STORE_RELEASE(p, v) do { WriteMemoryBarrier(); *(p) = (v); } while (0)
-
-// WriteMemoryBarrier(), ReadMemoryBarrier() and MaybeReadMemoryBarrier()
-// are an implementation detail and must not be used in the rest of the code.
-
-#if defined(__i386__)
-
-static inline void WriteMemoryBarrier() {
-  int x;
-  __asm__ __volatile__("xchgl (%0),%0"  // The lock prefix is implicit for xchg.
-                       :: "r" (&x));
-}
-
-#elif defined(__x86_64__)
-
-// 64-bit implementations of memory barrier can be simpler, because
-// "sfence" is guaranteed to exist.
-static inline void WriteMemoryBarrier() {
-  __asm__ __volatile__("sfence" : : : "memory");
-}
-
-#elif defined(__ppc__) || defined(__powerpc64__)
-
-static inline void WriteMemoryBarrier() {
-  __asm__ __volatile__("lwsync" : : : "memory");
-}
-
-#elif defined(__aarch64__)
-
-static inline void WriteMemoryBarrier() {
-  __asm__ __volatile__("dmb st" : : : "memory");
-}
-
-#elif defined(__alpha__)
-
-static inline void WriteMemoryBarrier() {
-  __asm__ __volatile__("wmb" : : : "memory");
-}
-
-#elif defined(__arm__) && defined(__linux__)
-
-// Linux on ARM puts a suitable memory barrier at a magic address for us to call.
-static inline void WriteMemoryBarrier() {
-  ((void(*)(void))0xffff0fa0)();
-}
-
-#elif defined(__windows__) || defined(_WIN32)
-
-#include <intrin.h>
-#include <windows.h>
-
-static inline void WriteMemoryBarrier() {
-#if defined(_M_IX86) || defined(_M_X64)
-  // x86 and x64 CPUs have a strong memory model that prohibits most types of
-  // reordering, so a non-instruction intrinsic to suppress compiler reordering
-  // is sufficient. _WriteBarrier is deprecated, but is still appropriate for
-  // the "old compiler" path (pre C++11).
-  _WriteBarrier();
-#else
-  LONG x;
-  ::InterlockedExchange(&x, 0);
-#endif
-}
-
-#elif defined(OS_NACL)
-
-static inline void WriteMemoryBarrier() {
-  __sync_synchronize();
-}
-
-#elif defined(__mips__)
-
-static inline void WriteMemoryBarrier() {
-  __asm__ __volatile__("sync" : : : "memory");
-}
-
-#else
-
-#include "util/mutex.h"
-
-static inline void WriteMemoryBarrier() {
-  // Slight overkill, but good enough:
-  // any mutex implementation must have
-  // a read barrier after the lock operation and
-  // a write barrier before the unlock operation.
-  //
-  // It may be worthwhile to write architecture-specific
-  // barriers for the common platforms, as above, but
-  // this is a correct fallback.
-  re2::Mutex mu;
-  re2::MutexLock l(&mu);
-}
-
-#endif
-
-// Alpha has very weak memory ordering. If relying on WriteBarriers, one must
-// use read barriers for the readers too.
-#if defined(__alpha__)
-
-static inline void MaybeReadMemoryBarrier() {
-  __asm__ __volatile__("mb" : : : "memory");
-}
-
-#else
-
-static inline void MaybeReadMemoryBarrier() {}
-
-#endif  // __alpha__
-
-// Read barrier for various targets.
-
-#if defined(__ppc__) || defined(__powerpc64__)
-
-static inline void ReadMemoryBarrier() {
-  __asm__ __volatile__("lwsync" : : : "memory");
-}
-
-#elif defined(__aarch64__)
-
-static inline void ReadMemoryBarrier() {
-  __asm__ __volatile__("dmb ld" : : : "memory");
-}
-
-#elif defined(__alpha__)
-
-static inline void ReadMemoryBarrier() {
-  __asm__ __volatile__("mb" : : : "memory");
-}
-
-#elif defined(__mips__)
-
-static inline void ReadMemoryBarrier() {
-  __asm__ __volatile__("sync" : : : "memory");
-}
-
-#else
-
-static inline void ReadMemoryBarrier() {}
-
-#endif
-
-#endif  // old compiler
-
-#ifndef NO_THREAD_SAFETY_ANALYSIS
-#define NO_THREAD_SAFETY_ANALYSIS
-#endif
-
-#endif  // RE2_UTIL_ATOMICOPS_H__
diff --git a/util/util.h b/util/util.h
index c59d91f..8c5d922 100644
--- a/util/util.h
+++ b/util/util.h
@@ -30,6 +30,7 @@
 #include <ostream>
 #include <utility>
 #include <set>
+#include <atomic>
 
 // Use std names.
 using std::set;
@@ -113,6 +114,10 @@ template<bool> struct CompileAssert {};
 
 #define arraysize(array) (int)(sizeof(array)/sizeof((array)[0]))
 
+#ifndef NO_THREAD_SAFETY_ANALYSIS
+#define NO_THREAD_SAFETY_ANALYSIS
+#endif
+
 class StringPiece;
 
 string CEscape(const StringPiece& src);
author	DongHun Kwak <dh0128.kwak@samsung.com>	2016-11-21 16:57:18 +0900
committer	DongHun Kwak <dh0128.kwak@samsung.com>	2016-11-21 16:57:18 +0900
commit	4dfe393027cfcfe844a8c8b97209a714904308c4 (patch)
tree	d94a05252de9becb345ed10d7c3cfed598005923
parent	485249b5a02cf59571cde61f83d10a6a9ec36b3d (diff)
download	re2-4dfe393027cfcfe844a8c8b97209a714904308c4.tar.gz re2-4dfe393027cfcfe844a8c8b97209a714904308c4.tar.bz2 re2-4dfe393027cfcfe844a8c8b97209a714904308c4.zip