summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2016-11-21 16:57:18 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2016-11-21 16:57:18 +0900
commit4dfe393027cfcfe844a8c8b97209a714904308c4 (patch)
treed94a05252de9becb345ed10d7c3cfed598005923
parent485249b5a02cf59571cde61f83d10a6a9ec36b3d (diff)
downloadre2-4dfe393027cfcfe844a8c8b97209a714904308c4.tar.gz
re2-4dfe393027cfcfe844a8c8b97209a714904308c4.tar.bz2
re2-4dfe393027cfcfe844a8c8b97209a714904308c4.zip
Imported Upstream version 20160301upstream/20160301
Change-Id: I33831129b7d3a860e34c551b415ede10da350c73 Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
-rw-r--r--BUILD13
-rw-r--r--CMakeLists.txt12
-rw-r--r--Makefile16
-rw-r--r--README5
-rw-r--r--re2/bitstate.cc8
-rw-r--r--re2/dfa.cc105
-rw-r--r--re2/parse.cc49
-rw-r--r--re2/prog.cc8
-rw-r--r--re2/prog.h6
-rw-r--r--re2/set.h2
-rw-r--r--re2/testing/backtrack.cc2
-rw-r--r--re2/testing/exhaustive1_test.cc7
-rw-r--r--re2/testing/re2_test.cc12
-rw-r--r--re2/testing/regexp_benchmark.cc33
-rw-r--r--re2/testing/tester.cc25
-rw-r--r--re2/testing/tester.h2
-rw-r--r--util/atomicops.h179
-rw-r--r--util/util.h5
18 files changed, 178 insertions, 311 deletions
diff --git a/BUILD b/BUILD
index 2fb0d22..2636631 100644
--- a/BUILD
+++ b/BUILD
@@ -36,7 +36,6 @@ cc_library(
"re2/unicode_groups.cc",
"re2/unicode_groups.h",
"re2/walker-inl.h",
- "util/atomicops.h",
"util/flags.h",
"util/hash.cc",
"util/logging.cc",
@@ -103,8 +102,8 @@ re2_test("filtered_re2_test")
re2_test("mimics_pcre_test")
re2_test("parse_test")
re2_test("possible_match_test")
-re2_test("re2_test")
re2_test("re2_arg_test")
+re2_test("re2_test")
re2_test("regexp_test")
re2_test("required_prefix_test")
re2_test("search_test")
@@ -112,11 +111,11 @@ re2_test("set_test")
re2_test("simplify_test")
re2_test("string_generator_test")
-re2_test("dfa_test")
-re2_test("exhaustive1_test")
-re2_test("exhaustive2_test")
-re2_test("exhaustive3_test")
-re2_test("exhaustive_test")
+re2_test("dfa_test", size="large")
+re2_test("exhaustive1_test", size="large")
+re2_test("exhaustive2_test", size="large")
+re2_test("exhaustive3_test", size="large")
+re2_test("exhaustive_test", size="large")
re2_test("random_test", size="large")
# TODO: Add support for regexp_benchmark.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1c980df..3ed718f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,11 +11,19 @@ option(USEPCRE "use PCRE in tests and benchmarks" OFF)
set(EXTRA_TARGET_LINK_LIBRARIES)
+if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+ if(MSVC_VERSION LESS 1800)
+ message(FATAL_ERROR "you need Visual Studio 2013 or later")
+ endif()
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+ add_compile_options(-std=c++11)
+endif()
+
if(WIN32)
add_definitions(-DUNICODE -D_UNICODE -DSTRICT -DNOMINMAX)
set(THREADING threadwin)
-else()
- add_definitions(-pthread)
+elseif(UNIX)
+ add_compile_options(-pthread)
set(THREADING thread)
list(APPEND EXTRA_TARGET_LINK_LIBRARIES -pthread)
endif()
diff --git a/Makefile b/Makefile
index fb5d5dc..92fbfc3 100644
--- a/Makefile
+++ b/Makefile
@@ -2,15 +2,20 @@
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
-# to build against PCRE for testing or benchmarking,
-# uncomment the next two lines
+# To build against ICU for full Unicode properties support,
+# uncomment the next two lines:
+# CCICU=$(shell pkg-config icu-uc --cflags) -DRE2_USE_ICU
+# LDICU=$(shell pkg-config icu-uc --libs)
+
+# To build against PCRE for testing or benchmarking,
+# uncomment the next two lines:
# CCPCRE=-I/usr/local/include -DUSEPCRE
# LDPCRE=-L/usr/local/lib -lpcre
CXX?=g++
-CXXFLAGS?=-O3 -g -pthread # can override
-RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCPCRE) # required
-LDFLAGS?=-pthread
+CXXFLAGS?=-std=c++11 -O3 -g -pthread # can override
+RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE) # required
+LDFLAGS?=-pthread $(LDICU)
AR?=ar
ARFLAGS?=rsc
NM?=nm
@@ -62,7 +67,6 @@ INSTALL_HFILES=\
re2/variadic_function.h\
HFILES=\
- util/atomicops.h\
util/benchmark.h\
util/flags.h\
util/logging.h\
diff --git a/README b/README
index 2c23660..119f33c 100644
--- a/README
+++ b/README
@@ -10,6 +10,9 @@ make test
make install
make testinstall
+There is a fair amount of documentation (including code snippets) in
+the re2.h header file.
+
More information can be found on the wiki:
https://github.com/google/re2/wiki
@@ -23,6 +26,8 @@ Unless otherwise noted, the RE2 source files are distributed
under the BSD-style license found in the LICENSE file.
RE2's native language is C++.
+
+A C wrapper is at https://github.com/marcomaggi/cre2/.
An Erlang wrapper is at https://github.com/tuncer/re2/.
An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM.
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 5740daa..0cfa480 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -169,13 +169,13 @@ bool BitState::TrySearch(int id0, const char* p0) {
// << (p - text_.begin()) << " " << arg;
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
- case kInstFail:
- return false;
-
default:
LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg;
return false;
+ case kInstFail:
+ continue;
+
case kInstAlt:
// Cannot just
// Push(ip->out1(), p, 0);
@@ -196,7 +196,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
id = ip->out1();
goto CheckAndLoop;
}
- LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;
+ LOG(DFATAL) << "Bad arg in kInstAlt: " << arg;
continue;
case kInstAltMatch:
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 1f54b9f..7c526c8 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -21,7 +21,6 @@
//
// See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent.
-#include "util/atomicops.h"
#include "util/flags.h"
#include "util/sparse_set.h"
#include "re2/prog.h"
@@ -102,7 +101,7 @@ class DFA {
uint flag_; // Empty string bitfield flags in effect on the way
// into this state, along with kFlagMatch if this
// is a matching state.
- State** next_; // Outgoing arrows from State,
+ std::atomic<State*> next_[1]; // Outgoing arrows from State,
// one per input byte class
};
@@ -283,7 +282,7 @@ class DFA {
struct StartInfo {
StartInfo() : start(NULL), firstbyte(kFbUnknown) { }
State* start;
- volatile int firstbyte;
+ std::atomic<int> firstbyte;
};
// Fills in params->start and params->firstbyte using
@@ -463,8 +462,9 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem)
// At minimum, the search requires room for two states in order
// to limp along, restarting frequently. We'll get better performance
// if there is room for a larger number of states, say 20.
- int64 one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) +
- (prog_->bytemap_range()+1)*sizeof(State*);
+ int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
+ int64 one_state = sizeof(State) + (nnext-1)*sizeof(std::atomic<State*>) +
+ (prog_->size()+nmark)*sizeof(int);
if (state_budget_ < 20*one_state) {
LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld",
prog_->size(), max_mem);
@@ -734,7 +734,7 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) {
mutex_.AssertHeld();
// Look in the cache for a pre-existing state.
- State state = { inst, ninst, flag, NULL };
+ State state = {inst, ninst, flag};
StateSet::iterator it = state_cache_.find(&state);
if (it != state_cache_.end()) {
if (DebugDFA)
@@ -748,19 +748,23 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) {
// State*, empirically.
const int kStateCacheOverhead = 32;
int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
- int mem = sizeof(State) + nnext*sizeof(State*) + ninst*sizeof(int);
+ int mem = sizeof(State) + (nnext-1)*sizeof(std::atomic<State*>) +
+ ninst*sizeof(int);
if (mem_budget_ < mem + kStateCacheOverhead) {
mem_budget_ = -1;
return NULL;
}
mem_budget_ -= mem + kStateCacheOverhead;
- // Allocate new state, along with room for next and inst.
+ // Allocate new state along with room for next_ and inst_.
char* space = new char[mem];
- State* s = reinterpret_cast<State*>(space);
- s->next_ = reinterpret_cast<State**>(s + 1);
- s->inst_ = reinterpret_cast<int*>(s->next_ + nnext);
- memset(s->next_, 0, nnext*sizeof s->next_[0]);
+ State* s = new (space) State;
+ (void) new (s->next_) std::atomic<State*>[nnext];
+ // Work around a unfortunate bug in older versions of libstdc++.
+ // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658)
+ for (int i = 0; i < nnext; i++)
+ (void) new (s->next_ + i) std::atomic<State*>(NULL);
+ s->inst_ = new (s->next_ + nnext) int[ninst];
memmove(s->inst_, inst, ninst*sizeof s->inst_[0]);
s->ninst_ = ninst;
s->flag_ = flag;
@@ -984,8 +988,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
}
// If someone else already computed this, return it.
- State* ns;
- ATOMIC_LOAD_CONSUME(ns, &state->next_[ByteMap(c)]);
+ State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed);
if (ns != NULL)
return ns;
@@ -1055,7 +1058,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
// Write barrier before updating state->next_ so that the
// main search loop can proceed without any locking, for speed.
// (Otherwise it would need one mutex operation per input byte.)
- ATOMIC_STORE_RELEASE(&state->next_[ByteMap(c)], ns);
+ state->next_[ByteMap(c)].store(ns, std::memory_order_release);
return ns;
}
@@ -1155,7 +1158,7 @@ void DFA::ResetCache(RWLocker* cache_lock) {
// Clear the cache, reset the memory budget.
for (int i = 0; i < kMaxStart; i++) {
start_[i].start = NULL;
- start_[i].firstbyte = kFbUnknown;
+ start_[i].firstbyte.store(kFbUnknown, std::memory_order_relaxed);
}
ClearCache();
mem_budget_ = state_budget_;
@@ -1376,8 +1379,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
// Okay to use bytemap[] not ByteMap() here, because
// c is known to be an actual byte and not kByteEndText.
- State* ns;
- ATOMIC_LOAD_CONSUME(ns, &s->next_[bytemap[c]]);
+ State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire);
if (ns == NULL) {
ns = RunStateOnByteUnlocked(s, c);
if (ns == NULL) {
@@ -1464,8 +1466,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
lastbyte = params->text.begin()[-1] & 0xFF;
}
- State* ns;
- ATOMIC_LOAD_CONSUME(ns, &s->next_[ByteMap(lastbyte)]);
+ State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire);
if (ns == NULL) {
ns = RunStateOnByteUnlocked(s, lastbyte);
if (ns == NULL) {
@@ -1653,16 +1654,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
}
}
- if (DebugDFA) {
- int fb;
- ATOMIC_LOAD_RELAXED(fb, &info->firstbyte);
+ if (DebugDFA)
fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n",
params->anchored, params->run_forward, flags,
- DumpState(info->start).c_str(), fb);
- }
+ DumpState(info->start).c_str(), info->firstbyte.load());
params->start = info->start;
- ATOMIC_LOAD_ACQUIRE(params->firstbyte, &info->firstbyte);
+ params->firstbyte = info->firstbyte.load(std::memory_order_acquire);
return true;
}
@@ -1671,13 +1669,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
uint flags) {
// Quick check.
- int fb;
- ATOMIC_LOAD_ACQUIRE(fb, &info->firstbyte);
+ int fb = info->firstbyte.load(std::memory_order_acquire);
if (fb != kFbUnknown)
return true;
MutexLock l(&mutex_);
- if (info->firstbyte != kFbUnknown)
+ fb = info->firstbyte.load(std::memory_order_relaxed);
+ if (fb != kFbUnknown)
return true;
q0_->clear();
@@ -1690,13 +1688,13 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
if (info->start == DeadState) {
// Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone);
+ info->firstbyte.store(kFbNone, std::memory_order_release);
return true;
}
if (info->start == FullMatchState) {
// Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone); // will be ignored
+ info->firstbyte.store(kFbNone, std::memory_order_release); // will be ignored
return true;
}
@@ -1708,7 +1706,7 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
State* s = RunStateOnByte(info->start, i);
if (s == NULL) {
// Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte);
+ info->firstbyte.store(firstbyte, std::memory_order_release);
return false;
}
if (s == info->start)
@@ -1721,8 +1719,9 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
break;
}
}
+
// Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte);
+ info->firstbyte.store(firstbyte, std::memory_order_release);
return true;
}
@@ -1784,17 +1783,21 @@ bool DFA::Search(const StringPiece& text,
//
// This is a separate function so that
// prog.h can be used without moving the definition of
-// class DFA out of this file. If you set
-// prog->dfa_ = dfa;
+// class DFA out of this file. If you (atomically) set
+// prog->dfa_first_ = dfa;
+// or
+// prog->dfa_longest_ = dfa;
// then you also have to set
// prog->delete_dfa_ = DeleteDFA;
-// so that ~Prog can delete the dfa.
-static void DeleteDFA(DFA* dfa) {
- delete dfa;
+// so that ~Prog can delete the DFA.
+static void DeleteDFA(std::atomic<DFA*>* pdfa) {
+ DFA* dfa = pdfa->load(std::memory_order_relaxed);
+ if (dfa != NULL)
+ delete dfa;
}
DFA* Prog::GetDFA(MatchKind kind) {
- DFA*volatile* pdfa;
+ std::atomic<DFA*>* pdfa;
if (kind == kFirstMatch || kind == kManyMatch) {
pdfa = &dfa_first_;
} else {
@@ -1803,13 +1806,12 @@ DFA* Prog::GetDFA(MatchKind kind) {
}
// Quick check.
- DFA *dfa;
- ATOMIC_LOAD_ACQUIRE(dfa, pdfa);
+ DFA* dfa = pdfa->load(std::memory_order_acquire);
if (dfa != NULL)
return dfa;
MutexLock l(&dfa_mutex_);
- dfa = *pdfa;
+ dfa = pdfa->load(std::memory_order_relaxed);
if (dfa != NULL)
return dfa;
@@ -1828,8 +1830,7 @@ DFA* Prog::GetDFA(MatchKind kind) {
delete_dfa_ = DeleteDFA;
// Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(pdfa, dfa);
-
+ pdfa->store(dfa, std::memory_order_release);
return dfa;
}
@@ -2094,19 +2095,9 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
// PossibleMatchRange for a Prog.
bool Prog::PossibleMatchRange(string* min, string* max, int maxlen) {
- DFA* dfa = NULL;
- {
- MutexLock l(&dfa_mutex_);
- // Have to use dfa_longest_ to get all strings for full matches.
- // For example, (a|aa) never matches aa in first-match mode.
- dfa = dfa_longest_;
- if (dfa == NULL) {
- dfa = new DFA(this, Prog::kLongestMatch, dfa_mem_/2);
- ATOMIC_STORE_RELEASE(&dfa_longest_, dfa);
- delete_dfa_ = DeleteDFA;
- }
- }
- return dfa->PossibleMatchRange(min, max, maxlen);
+ // Have to use dfa_longest_ to get all strings for full matches.
+ // For example, (a|aa) never matches aa in first-match mode.
+ return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
}
} // namespace re2
diff --git a/re2/parse.cc b/re2/parse.cc
index f51e589..f55dc8c 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -23,6 +23,12 @@
#include "re2/unicode_groups.h"
#include "re2/walker-inl.h"
+#if defined(RE2_USE_ICU)
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/utypes.h"
+#endif
+
namespace re2 {
// Regular expression parse state.
@@ -1488,11 +1494,6 @@ static const UGroup* LookupGroup(const StringPiece& name,
return NULL;
}
-// Fake UGroup containing all Runes
-static URange16 any16[] = { { 0, 65535 } };
-static URange32 any32[] = { { 65536, Runemax } };
-static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 };
-
// Look for a POSIX group with the given name (e.g., "[:^alpha:]")
static const UGroup* LookupPosixGroup(const StringPiece& name) {
return LookupGroup(name, posix_groups, num_posix_groups);
@@ -1502,6 +1503,12 @@ static const UGroup* LookupPerlGroup(const StringPiece& name) {
return LookupGroup(name, perl_groups, num_perl_groups);
}
+#if !defined(RE2_USE_ICU)
+// Fake UGroup containing all Runes
+static URange16 any16[] = { { 0, 65535 } };
+static URange32 any32[] = { { 65536, Runemax } };
+static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 };
+
// Look for a Unicode group with the given name (e.g., "Han")
static const UGroup* LookupUnicodeGroup(const StringPiece& name) {
// Special case: "Any" means any.
@@ -1509,6 +1516,7 @@ static const UGroup* LookupUnicodeGroup(const StringPiece& name) {
return &anygroup;
return LookupGroup(name, unicode_groups, num_unicode_groups);
}
+#endif
// Add a UGroup or its negation to the character class.
static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign,
@@ -1600,7 +1608,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
// Committed to parse. Results:
int sign = +1; // -1 = negated char class
if (c == 'P')
- sign = -1;
+ sign = -sign;
StringPiece seq = *s; // \p{Han} or \pL
StringPiece name; // Han or L
s->remove_prefix(2); // '\\', 'p'
@@ -1630,11 +1638,13 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
// Chop seq where s now begins.
seq = StringPiece(seq.begin(), static_cast<int>(s->begin() - seq.begin()));
- // Look up group
if (name.size() > 0 && name[0] == '^') {
sign = -sign;
name.remove_prefix(1); // '^'
}
+
+#if !defined(RE2_USE_ICU)
+ // Look up the group in the RE2 Unicode data.
const UGroup *g = LookupUnicodeGroup(name);
if (g == NULL) {
status->set_code(kRegexpBadCharRange);
@@ -1643,6 +1653,31 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
}
AddUGroup(cc, g, sign, parse_flags);
+#else
+ // Look up the group in the ICU Unicode data. Because ICU provides full
+ // Unicode properties support, this could be more than a lookup by name.
+ ::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8(
+ string("\\p{") + name.ToString() + string("}"));
+ UErrorCode uerr = U_ZERO_ERROR;
+ ::icu::UnicodeSet uset(ustr, uerr);
+ if (U_FAILURE(uerr)) {
+ status->set_code(kRegexpBadCharRange);
+ status->set_error_arg(seq);
+ return kParseError;
+ }
+
+ // Convert the UnicodeSet to a URange32 and UGroup that we can add.
+ int nr = uset.getRangeCount();
+ URange32* r = new URange32[nr];
+ for (int i = 0; i < nr; i++) {
+ r[i].lo = uset.getRangeStart(i);
+ r[i].hi = uset.getRangeEnd(i);
+ }
+ UGroup g = {"", +1, 0, 0, r, nr};
+ AddUGroup(cc, &g, sign, parse_flags);
+ delete[] r;
+#endif
+
return kParseOk;
}
diff --git a/re2/prog.cc b/re2/prog.cc
index 499f560..cb7e6e0 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -113,11 +113,9 @@ Prog::Prog()
}
Prog::~Prog() {
- if (delete_dfa_) {
- if (dfa_first_)
- delete_dfa_(dfa_first_);
- if (dfa_longest_)
- delete_dfa_(dfa_longest_);
+ if (delete_dfa_ != NULL) {
+ delete_dfa_(&dfa_first_);
+ delete_dfa_(&dfa_longest_);
}
delete[] onepass_nodes_;
delete[] inst_;
diff --git a/re2/prog.h b/re2/prog.h
index 8c5b2c4..45e9f0b 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -360,10 +360,10 @@ class Prog {
Inst* inst_; // pointer to instruction array
Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_
- DFA* volatile dfa_first_; // DFA cached for kFirstMatch
- DFA* volatile dfa_longest_; // DFA cached for kLongestMatch and kFullMatch
+ std::atomic<DFA*> dfa_first_; // DFA cached for kFirstMatch
+ std::atomic<DFA*> dfa_longest_; // DFA cached for kLongestMatch and kFullMatch
int64 dfa_mem_; // Maximum memory for DFAs.
- void (*delete_dfa_)(DFA* dfa);
+ void (*delete_dfa_)(std::atomic<DFA*>* pdfa);
Bitmap<256> byterange_; // byterange.Get(x) true if x ends a
// commonly-treated byte range.
diff --git a/re2/set.h b/re2/set.h
index 1f55b61..e48d8cb 100644
--- a/re2/set.h
+++ b/re2/set.h
@@ -31,7 +31,7 @@ class RE2::Set {
// Compile prepares the Set for matching.
// Add must not be called again after Compile.
- // Compile must be called before FullMatch or PartialMatch.
+ // Compile must be called before Match.
// Compile may return false if it runs out of memory.
bool Compile();
diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc
index a872840..2141c53 100644
--- a/re2/testing/backtrack.cc
+++ b/re2/testing/backtrack.cc
@@ -4,7 +4,7 @@
// Tested by search_test.cc, exhaustive_test.cc, tester.cc
//
-// Prog::BadSearchBacktrack is a backtracking regular expression search,
+// Prog::UnsafeSearchBacktrack is a backtracking regular expression search,
// except that it remembers where it has been, trading a lot of
// memory for a lot of time. It exists only for testing purposes.
//
diff --git a/re2/testing/exhaustive1_test.cc b/re2/testing/exhaustive1_test.cc
index 9e057cc..c06a10c 100644
--- a/re2/testing/exhaustive1_test.cc
+++ b/re2/testing/exhaustive1_test.cc
@@ -33,10 +33,9 @@ TEST(Repetition, Capturing) {
7, Explode("ab"), "(?:%s)", "");
// This would be a great test, but it runs forever when PCRE is enabled.
- if (strstr("PCRE", FLAGS_regexp_engines.c_str()) == NULL)
- ExhaustiveTest(4, 3, Split(" ", "a (a)"), ops,
- 100, Explode("a"), "(?:%s)", "");
+ if (!StringPiece(FLAGS_regexp_engines).contains("PCRE"))
+ ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops,
+ 50, Explode("a"), "(?:%s)", "");
}
} // namespace re2
-
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index e201c1e..5a32d92 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -6,7 +6,7 @@
// TODO: Test extractions for PartialMatch/Consume
#include <errno.h>
-#ifndef _MSC_VER
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h> /* for sysconf */
#include <sys/mman.h>
#endif
@@ -17,8 +17,6 @@
#include "re2/re2.h"
#include "re2/regexp.h"
-DECLARE_bool(logtostderr);
-
namespace re2 {
TEST(RE2, HexTests) {
@@ -1540,7 +1538,7 @@ TEST(RE2, Bug18458852) {
}
TEST(RE2, Bug18523943) {
- // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL).
+ // Bug in BitState: case kFailInst failed the match entirely.
RE2::Options opt;
const char a[] = {
@@ -1559,7 +1557,7 @@ TEST(RE2, Bug18523943) {
RE2 re((const char*)b, opt);
CHECK(re.ok());
string s1;
- CHECK(!RE2::PartialMatch((const char*)a, re, &s1));
+ CHECK(RE2::PartialMatch((const char*)a, re, &s1));
}
TEST(RE2, Bug21371806) {
@@ -1585,10 +1583,10 @@ TEST(RE2, Bug26356109) {
string s = "abc";
StringPiece m;
- CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
+ CHECK(re.Match(s, 0, static_cast<int>(s.size()), RE2::UNANCHORED, &m, 1));
CHECK_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
- CHECK(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
+ CHECK(re.Match(s, 0, static_cast<int>(s.size()), RE2::ANCHOR_BOTH, &m, 1));
CHECK_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
}
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
index 141bad1..8d80b4c 100644
--- a/re2/testing/regexp_benchmark.cc
+++ b/re2/testing/regexp_benchmark.cc
@@ -299,9 +299,9 @@ void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search
// Unambiguous search (RE2 can use OnePass).
void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA); }
-void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCRE); }
void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2); }
+void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
#ifdef USEPCRE
@@ -311,9 +311,9 @@ BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
-void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
+void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
#ifdef USEPCRE
@@ -323,28 +323,31 @@ BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs
BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
// Ambiguous search (RE2 cannot use OnePass).
+// Used to be ".*.$", but that is coalesced to ".+$" these days.
-void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchDFA); }
-void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchPCRE); }
-void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchRE2); }
-void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*.$", SearchBitState); }
+void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchDFA); }
+void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchPCRE); }
+void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchRE2); }
+void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchBitState); }
-BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
#ifdef USEPCRE
-BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
#endif
-BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
-void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedDFA); }
-void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); }
-void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedRE2); }
+void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedDFA); }
+void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedPCRE); }
+void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedRE2); }
+void Search_Success1_Cached_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedBitState); }
-BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
#ifdef USEPCRE
-BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
#endif
-BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_Cached_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
// Benchmark: use regexp to find phone number.
diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
index cb12bad..62cbb6c 100644
--- a/re2/testing/tester.cc
+++ b/re2/testing/tester.cc
@@ -26,7 +26,7 @@ enum {
kMaxSubmatch = 1+16, // $0...$16
};
-const char* engine_types[kEngineMax] = {
+const char* engine_names[kEngineMax] = {
"Backtrack",
"NFA",
"DFA",
@@ -39,18 +39,18 @@ const char* engine_types[kEngineMax] = {
"PCRE",
};
-// Returns the name string for the type t.
-static string EngineString(Engine t) {
- if (t < 0 || t >= arraysize(engine_types) || engine_types[t] == NULL) {
- return StringPrintf("type%d", static_cast<int>(t));
- }
- return engine_types[t];
+// Returns the name of the engine.
+static StringPiece EngineName(Engine e) {
+ CHECK_GE(e, 0);
+ CHECK_LT(e, arraysize(engine_names));
+ CHECK(engine_names[e] != NULL);
+ return engine_names[e];
}
// Returns bit mask of engines to use.
static uint32 Engines() {
- static uint32 cached_engines;
- static bool did_parse;
+ static bool did_parse = false;
+ static uint32 cached_engines = 0;
if (did_parse)
return cached_engines;
@@ -59,7 +59,7 @@ static uint32 Engines() {
cached_engines = ~0;
} else {
for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
- if (strstr(EngineString(i).c_str(), FLAGS_regexp_engines.c_str()))
+ if (StringPiece(FLAGS_regexp_engines).contains(EngineName(i)))
cached_engines |= 1<<i;
}
@@ -69,8 +69,9 @@ static uint32 Engines() {
cached_engines &= ~(1<<kEnginePCRE);
for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
if (cached_engines & (1<<i))
- LOG(INFO) << EngineString(i) << " enabled";
+ LOG(INFO) << EngineName(i) << " enabled";
}
+
did_parse = true;
return cached_engines;
}
@@ -566,7 +567,7 @@ void TestInstance::LogMatch(const char* prefix, Engine e,
const StringPiece& text, const StringPiece& context,
Prog::Anchor anchor) {
LOG(INFO) << prefix
- << EngineString(e)
+ << EngineName(e)
<< " regexp "
<< CEscape(regexp_str_)
<< " "
diff --git a/re2/testing/tester.h b/re2/testing/tester.h
index d1e1b22..e9fae85 100644
--- a/re2/testing/tester.h
+++ b/re2/testing/tester.h
@@ -20,7 +20,7 @@ class Regexp;
// All the supported regexp engines.
enum Engine {
- kEngineBacktrack = 0, // Prog::BadSearchBacktrack
+ kEngineBacktrack = 0, // Prog::UnsafeSearchBacktrack
kEngineNFA, // Prog::SearchNFA
kEngineDFA, // Prog::SearchDFA, only ask whether it matched
kEngineDFA1, // Prog::SearchDFA, ask for match[0]
diff --git a/util/atomicops.h b/util/atomicops.h
deleted file mode 100644
index dc944e7..0000000
--- a/util/atomicops.h
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_UTIL_ATOMICOPS_H__
-#define RE2_UTIL_ATOMICOPS_H__
-
-// The memory ordering constraints resemble the ones in C11.
-// RELAXED - no memory ordering, just an atomic operation.
-// CONSUME - data-dependent ordering.
-// ACQUIRE - prevents memory accesses from hoisting above the operation.
-// RELEASE - prevents memory accesses from sinking below the operation.
-
-#ifndef __has_builtin
-#define __has_builtin(x) 0
-#endif
-
-#if !defined(OS_NACL) && (__has_builtin(__atomic_load_n) || (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__ >= 40801))
-
-#define ATOMIC_LOAD_RELAXED(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_RELAXED); } while (0)
-#define ATOMIC_LOAD_CONSUME(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_CONSUME); } while (0)
-#define ATOMIC_LOAD_ACQUIRE(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_ACQUIRE); } while (0)
-#define ATOMIC_STORE_RELAXED(p, v) __atomic_store_n((p), (v), __ATOMIC_RELAXED)
-#define ATOMIC_STORE_RELEASE(p, v) __atomic_store_n((p), (v), __ATOMIC_RELEASE)
-
-#else // old compiler
-
-#define ATOMIC_LOAD_RELAXED(x, p) do { (x) = *(p); } while (0)
-#define ATOMIC_LOAD_CONSUME(x, p) do { (x) = *(p); MaybeReadMemoryBarrier(); } while (0)
-#define ATOMIC_LOAD_ACQUIRE(x, p) do { (x) = *(p); ReadMemoryBarrier(); } while (0)
-#define ATOMIC_STORE_RELAXED(p, v) do { *(p) = (v); } while (0)
-#define ATOMIC_STORE_RELEASE(p, v) do { WriteMemoryBarrier(); *(p) = (v); } while (0)
-
-// WriteMemoryBarrier(), ReadMemoryBarrier() and MaybeReadMemoryBarrier()
-// are an implementation detail and must not be used in the rest of the code.
-
-#if defined(__i386__)
-
-static inline void WriteMemoryBarrier() {
- int x;
- __asm__ __volatile__("xchgl (%0),%0" // The lock prefix is implicit for xchg.
- :: "r" (&x));
-}
-
-#elif defined(__x86_64__)
-
-// 64-bit implementations of memory barrier can be simpler, because
-// "sfence" is guaranteed to exist.
-static inline void WriteMemoryBarrier() {
- __asm__ __volatile__("sfence" : : : "memory");
-}
-
-#elif defined(__ppc__) || defined(__powerpc64__)
-
-static inline void WriteMemoryBarrier() {
- __asm__ __volatile__("lwsync" : : : "memory");
-}
-
-#elif defined(__aarch64__)
-
-static inline void WriteMemoryBarrier() {
- __asm__ __volatile__("dmb st" : : : "memory");
-}
-
-#elif defined(__alpha__)
-
-static inline void WriteMemoryBarrier() {
- __asm__ __volatile__("wmb" : : : "memory");
-}
-
-#elif defined(__arm__) && defined(__linux__)
-
-// Linux on ARM puts a suitable memory barrier at a magic address for us to call.
-static inline void WriteMemoryBarrier() {
- ((void(*)(void))0xffff0fa0)();
-}
-
-#elif defined(__windows__) || defined(_WIN32)
-
-#include <intrin.h>
-#include <windows.h>
-
-static inline void WriteMemoryBarrier() {
-#if defined(_M_IX86) || defined(_M_X64)
- // x86 and x64 CPUs have a strong memory model that prohibits most types of
- // reordering, so a non-instruction intrinsic to suppress compiler reordering
- // is sufficient. _WriteBarrier is deprecated, but is still appropriate for
- // the "old compiler" path (pre C++11).
- _WriteBarrier();
-#else
- LONG x;
- ::InterlockedExchange(&x, 0);
-#endif
-}
-
-#elif defined(OS_NACL)
-
-static inline void WriteMemoryBarrier() {
- __sync_synchronize();
-}
-
-#elif defined(__mips__)
-
-static inline void WriteMemoryBarrier() {
- __asm__ __volatile__("sync" : : : "memory");
-}
-
-#else
-
-#include "util/mutex.h"
-
-static inline void WriteMemoryBarrier() {
- // Slight overkill, but good enough:
- // any mutex implementation must have
- // a read barrier after the lock operation and
- // a write barrier before the unlock operation.
- //
- // It may be worthwhile to write architecture-specific
- // barriers for the common platforms, as above, but
- // this is a correct fallback.
- re2::Mutex mu;
- re2::MutexLock l(&mu);
-}
-
-#endif
-
-// Alpha has very weak memory ordering. If relying on WriteBarriers, one must
-// use read barriers for the readers too.
-#if defined(__alpha__)
-
-static inline void MaybeReadMemoryBarrier() {
- __asm__ __volatile__("mb" : : : "memory");
-}
-
-#else
-
-static inline void MaybeReadMemoryBarrier() {}
-
-#endif // __alpha__
-
-// Read barrier for various targets.
-
-#if defined(__ppc__) || defined(__powerpc64__)
-
-static inline void ReadMemoryBarrier() {
- __asm__ __volatile__("lwsync" : : : "memory");
-}
-
-#elif defined(__aarch64__)
-
-static inline void ReadMemoryBarrier() {
- __asm__ __volatile__("dmb ld" : : : "memory");
-}
-
-#elif defined(__alpha__)
-
-static inline void ReadMemoryBarrier() {
- __asm__ __volatile__("mb" : : : "memory");
-}
-
-#elif defined(__mips__)
-
-static inline void ReadMemoryBarrier() {
- __asm__ __volatile__("sync" : : : "memory");
-}
-
-#else
-
-static inline void ReadMemoryBarrier() {}
-
-#endif
-
-#endif // old compiler
-
-#ifndef NO_THREAD_SAFETY_ANALYSIS
-#define NO_THREAD_SAFETY_ANALYSIS
-#endif
-
-#endif // RE2_UTIL_ATOMICOPS_H__
diff --git a/util/util.h b/util/util.h
index c59d91f..8c5d922 100644
--- a/util/util.h
+++ b/util/util.h
@@ -30,6 +30,7 @@
#include <ostream>
#include <utility>
#include <set>
+#include <atomic>
// Use std names.
using std::set;
@@ -113,6 +114,10 @@ template<bool> struct CompileAssert {};
#define arraysize(array) (int)(sizeof(array)/sizeof((array)[0]))
+#ifndef NO_THREAD_SAFETY_ANALYSIS
+#define NO_THREAD_SAFETY_ANALYSIS
+#endif
+
class StringPiece;
string CEscape(const StringPiece& src);