diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-11-21 16:57:18 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-11-21 16:57:18 +0900 |
commit | 4dfe393027cfcfe844a8c8b97209a714904308c4 (patch) | |
tree | d94a05252de9becb345ed10d7c3cfed598005923 | |
parent | 485249b5a02cf59571cde61f83d10a6a9ec36b3d (diff) | |
download | re2-4dfe393027cfcfe844a8c8b97209a714904308c4.tar.gz re2-4dfe393027cfcfe844a8c8b97209a714904308c4.tar.bz2 re2-4dfe393027cfcfe844a8c8b97209a714904308c4.zip |
Imported Upstream version 20160301upstream/20160301
Change-Id: I33831129b7d3a860e34c551b415ede10da350c73
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
-rw-r--r-- | BUILD | 13 | ||||
-rw-r--r-- | CMakeLists.txt | 12 | ||||
-rw-r--r-- | Makefile | 16 | ||||
-rw-r--r-- | README | 5 | ||||
-rw-r--r-- | re2/bitstate.cc | 8 | ||||
-rw-r--r-- | re2/dfa.cc | 105 | ||||
-rw-r--r-- | re2/parse.cc | 49 | ||||
-rw-r--r-- | re2/prog.cc | 8 | ||||
-rw-r--r-- | re2/prog.h | 6 | ||||
-rw-r--r-- | re2/set.h | 2 | ||||
-rw-r--r-- | re2/testing/backtrack.cc | 2 | ||||
-rw-r--r-- | re2/testing/exhaustive1_test.cc | 7 | ||||
-rw-r--r-- | re2/testing/re2_test.cc | 12 | ||||
-rw-r--r-- | re2/testing/regexp_benchmark.cc | 33 | ||||
-rw-r--r-- | re2/testing/tester.cc | 25 | ||||
-rw-r--r-- | re2/testing/tester.h | 2 | ||||
-rw-r--r-- | util/atomicops.h | 179 | ||||
-rw-r--r-- | util/util.h | 5 |
18 files changed, 178 insertions, 311 deletions
@@ -36,7 +36,6 @@ cc_library( "re2/unicode_groups.cc", "re2/unicode_groups.h", "re2/walker-inl.h", - "util/atomicops.h", "util/flags.h", "util/hash.cc", "util/logging.cc", @@ -103,8 +102,8 @@ re2_test("filtered_re2_test") re2_test("mimics_pcre_test") re2_test("parse_test") re2_test("possible_match_test") -re2_test("re2_test") re2_test("re2_arg_test") +re2_test("re2_test") re2_test("regexp_test") re2_test("required_prefix_test") re2_test("search_test") @@ -112,11 +111,11 @@ re2_test("set_test") re2_test("simplify_test") re2_test("string_generator_test") -re2_test("dfa_test") -re2_test("exhaustive1_test") -re2_test("exhaustive2_test") -re2_test("exhaustive3_test") -re2_test("exhaustive_test") +re2_test("dfa_test", size="large") +re2_test("exhaustive1_test", size="large") +re2_test("exhaustive2_test", size="large") +re2_test("exhaustive3_test", size="large") +re2_test("exhaustive_test", size="large") re2_test("random_test", size="large") # TODO: Add support for regexp_benchmark. diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c980df..3ed718f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,11 +11,19 @@ option(USEPCRE "use PCRE in tests and benchmarks" OFF) set(EXTRA_TARGET_LINK_LIBRARIES) +if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + if(MSVC_VERSION LESS 1800) + message(FATAL_ERROR "you need Visual Studio 2013 or later") + endif() +elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + add_compile_options(-std=c++11) +endif() + if(WIN32) add_definitions(-DUNICODE -D_UNICODE -DSTRICT -DNOMINMAX) set(THREADING threadwin) -else() - add_definitions(-pthread) +elseif(UNIX) + add_compile_options(-pthread) set(THREADING thread) list(APPEND EXTRA_TARGET_LINK_LIBRARIES -pthread) endif() @@ -2,15 +2,20 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -# to build against PCRE for testing or benchmarking, -# uncomment the next two lines +# To build against ICU for full Unicode properties support, +# uncomment the next two lines: +# CCICU=$(shell pkg-config icu-uc --cflags) -DRE2_USE_ICU +# LDICU=$(shell pkg-config icu-uc --libs) + +# To build against PCRE for testing or benchmarking, +# uncomment the next two lines: # CCPCRE=-I/usr/local/include -DUSEPCRE # LDPCRE=-L/usr/local/lib -lpcre CXX?=g++ -CXXFLAGS?=-O3 -g -pthread # can override -RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCPCRE) # required -LDFLAGS?=-pthread +CXXFLAGS?=-std=c++11 -O3 -g -pthread # can override +RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE) # required +LDFLAGS?=-pthread $(LDICU) AR?=ar ARFLAGS?=rsc NM?=nm @@ -62,7 +67,6 @@ INSTALL_HFILES=\ re2/variadic_function.h\ HFILES=\ - util/atomicops.h\ util/benchmark.h\ util/flags.h\ util/logging.h\ @@ -10,6 +10,9 @@ make test make install make testinstall +There is a fair amount of documentation (including code snippets) in +the re2.h header file. + More information can be found on the wiki: https://github.com/google/re2/wiki @@ -23,6 +26,8 @@ Unless otherwise noted, the RE2 source files are distributed under the BSD-style license found in the LICENSE file. RE2's native language is C++. + +A C wrapper is at https://github.com/marcomaggi/cre2/. An Erlang wrapper is at https://github.com/tuncer/re2/. An Inferno wrapper is at https://github.com/powerman/inferno-re2/. A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM. diff --git a/re2/bitstate.cc b/re2/bitstate.cc index 5740daa..0cfa480 100644 --- a/re2/bitstate.cc +++ b/re2/bitstate.cc @@ -169,13 +169,13 @@ bool BitState::TrySearch(int id0, const char* p0) { // << (p - text_.begin()) << " " << arg; Prog::Inst* ip = prog_->inst(id); switch (ip->opcode()) { - case kInstFail: - return false; - default: LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg; return false; + case kInstFail: + continue; + case kInstAlt: // Cannot just // Push(ip->out1(), p, 0); @@ -196,7 +196,7 @@ bool BitState::TrySearch(int id0, const char* p0) { id = ip->out1(); goto CheckAndLoop; } - LOG(DFATAL) << "Bad arg in kInstCapture: " << arg; + LOG(DFATAL) << "Bad arg in kInstAlt: " << arg; continue; case kInstAltMatch: @@ -21,7 +21,6 @@ // // See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent. -#include "util/atomicops.h" #include "util/flags.h" #include "util/sparse_set.h" #include "re2/prog.h" @@ -102,7 +101,7 @@ class DFA { uint flag_; // Empty string bitfield flags in effect on the way // into this state, along with kFlagMatch if this // is a matching state. - State** next_; // Outgoing arrows from State, + std::atomic<State*> next_[1]; // Outgoing arrows from State, // one per input byte class }; @@ -283,7 +282,7 @@ class DFA { struct StartInfo { StartInfo() : start(NULL), firstbyte(kFbUnknown) { } State* start; - volatile int firstbyte; + std::atomic<int> firstbyte; }; // Fills in params->start and params->firstbyte using @@ -463,8 +462,9 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem) // At minimum, the search requires room for two states in order // to limp along, restarting frequently. We'll get better performance // if there is room for a larger number of states, say 20. - int64 one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) + - (prog_->bytemap_range()+1)*sizeof(State*); + int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot + int64 one_state = sizeof(State) + (nnext-1)*sizeof(std::atomic<State*>) + + (prog_->size()+nmark)*sizeof(int); if (state_budget_ < 20*one_state) { LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld", prog_->size(), max_mem); @@ -734,7 +734,7 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) { mutex_.AssertHeld(); // Look in the cache for a pre-existing state. - State state = { inst, ninst, flag, NULL }; + State state = {inst, ninst, flag}; StateSet::iterator it = state_cache_.find(&state); if (it != state_cache_.end()) { if (DebugDFA) @@ -748,19 +748,23 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) { // State*, empirically. const int kStateCacheOverhead = 32; int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot - int mem = sizeof(State) + nnext*sizeof(State*) + ninst*sizeof(int); + int mem = sizeof(State) + (nnext-1)*sizeof(std::atomic<State*>) + + ninst*sizeof(int); if (mem_budget_ < mem + kStateCacheOverhead) { mem_budget_ = -1; return NULL; } mem_budget_ -= mem + kStateCacheOverhead; - // Allocate new state, along with room for next and inst. + // Allocate new state along with room for next_ and inst_. char* space = new char[mem]; - State* s = reinterpret_cast<State*>(space); - s->next_ = reinterpret_cast<State**>(s + 1); - s->inst_ = reinterpret_cast<int*>(s->next_ + nnext); - memset(s->next_, 0, nnext*sizeof s->next_[0]); + State* s = new (space) State; + (void) new (s->next_) std::atomic<State*>[nnext]; + // Work around a unfortunate bug in older versions of libstdc++. + // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658) + for (int i = 0; i < nnext; i++) + (void) new (s->next_ + i) std::atomic<State*>(NULL); + s->inst_ = new (s->next_ + nnext) int[ninst]; memmove(s->inst_, inst, ninst*sizeof s->inst_[0]); s->ninst_ = ninst; s->flag_ = flag; @@ -984,8 +988,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { } // If someone else already computed this, return it. - State* ns; - ATOMIC_LOAD_CONSUME(ns, &state->next_[ByteMap(c)]); + State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed); if (ns != NULL) return ns; @@ -1055,7 +1058,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // Write barrier before updating state->next_ so that the // main search loop can proceed without any locking, for speed. // (Otherwise it would need one mutex operation per input byte.) - ATOMIC_STORE_RELEASE(&state->next_[ByteMap(c)], ns); + state->next_[ByteMap(c)].store(ns, std::memory_order_release); return ns; } @@ -1155,7 +1158,7 @@ void DFA::ResetCache(RWLocker* cache_lock) { // Clear the cache, reset the memory budget. for (int i = 0; i < kMaxStart; i++) { start_[i].start = NULL; - start_[i].firstbyte = kFbUnknown; + start_[i].firstbyte.store(kFbUnknown, std::memory_order_relaxed); } ClearCache(); mem_budget_ = state_budget_; @@ -1376,8 +1379,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, // Okay to use bytemap[] not ByteMap() here, because // c is known to be an actual byte and not kByteEndText. - State* ns; - ATOMIC_LOAD_CONSUME(ns, &s->next_[bytemap[c]]); + State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire); if (ns == NULL) { ns = RunStateOnByteUnlocked(s, c); if (ns == NULL) { @@ -1464,8 +1466,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, lastbyte = params->text.begin()[-1] & 0xFF; } - State* ns; - ATOMIC_LOAD_CONSUME(ns, &s->next_[ByteMap(lastbyte)]); + State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire); if (ns == NULL) { ns = RunStateOnByteUnlocked(s, lastbyte); if (ns == NULL) { @@ -1653,16 +1654,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) { } } - if (DebugDFA) { - int fb; - ATOMIC_LOAD_RELAXED(fb, &info->firstbyte); + if (DebugDFA) fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n", params->anchored, params->run_forward, flags, - DumpState(info->start).c_str(), fb); - } + DumpState(info->start).c_str(), info->firstbyte.load()); params->start = info->start; - ATOMIC_LOAD_ACQUIRE(params->firstbyte, &info->firstbyte); + params->firstbyte = info->firstbyte.load(std::memory_order_acquire); return true; } @@ -1671,13 +1669,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) { bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, uint flags) { // Quick check. - int fb; - ATOMIC_LOAD_ACQUIRE(fb, &info->firstbyte); + int fb = info->firstbyte.load(std::memory_order_acquire); if (fb != kFbUnknown) return true; MutexLock l(&mutex_); - if (info->firstbyte != kFbUnknown) + fb = info->firstbyte.load(std::memory_order_relaxed); + if (fb != kFbUnknown) return true; q0_->clear(); @@ -1690,13 +1688,13 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, if (info->start == DeadState) { // Synchronize with "quick check" above. - ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone); + info->firstbyte.store(kFbNone, std::memory_order_release); return true; } if (info->start == FullMatchState) { // Synchronize with "quick check" above. - ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone); // will be ignored + info->firstbyte.store(kFbNone, std::memory_order_release); // will be ignored return true; } @@ -1708,7 +1706,7 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, State* s = RunStateOnByte(info->start, i); if (s == NULL) { // Synchronize with "quick check" above. - ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte); + info->firstbyte.store(firstbyte, std::memory_order_release); return false; } if (s == info->start) @@ -1721,8 +1719,9 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, break; } } + // Synchronize with "quick check" above. - ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte); + info->firstbyte.store(firstbyte, std::memory_order_release); return true; } @@ -1784,17 +1783,21 @@ bool DFA::Search(const StringPiece& text, // // This is a separate function so that // prog.h can be used without moving the definition of -// class DFA out of this file. If you set -// prog->dfa_ = dfa; +// class DFA out of this file. If you (atomically) set +// prog->dfa_first_ = dfa; +// or +// prog->dfa_longest_ = dfa; // then you also have to set // prog->delete_dfa_ = DeleteDFA; -// so that ~Prog can delete the dfa. -static void DeleteDFA(DFA* dfa) { - delete dfa; +// so that ~Prog can delete the DFA. +static void DeleteDFA(std::atomic<DFA*>* pdfa) { + DFA* dfa = pdfa->load(std::memory_order_relaxed); + if (dfa != NULL) + delete dfa; } DFA* Prog::GetDFA(MatchKind kind) { - DFA*volatile* pdfa; + std::atomic<DFA*>* pdfa; if (kind == kFirstMatch || kind == kManyMatch) { pdfa = &dfa_first_; } else { @@ -1803,13 +1806,12 @@ DFA* Prog::GetDFA(MatchKind kind) { } // Quick check. - DFA *dfa; - ATOMIC_LOAD_ACQUIRE(dfa, pdfa); + DFA* dfa = pdfa->load(std::memory_order_acquire); if (dfa != NULL) return dfa; MutexLock l(&dfa_mutex_); - dfa = *pdfa; + dfa = pdfa->load(std::memory_order_relaxed); if (dfa != NULL) return dfa; @@ -1828,8 +1830,7 @@ DFA* Prog::GetDFA(MatchKind kind) { delete_dfa_ = DeleteDFA; // Synchronize with "quick check" above. - ATOMIC_STORE_RELEASE(pdfa, dfa); - + pdfa->store(dfa, std::memory_order_release); return dfa; } @@ -2094,19 +2095,9 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) { // PossibleMatchRange for a Prog. bool Prog::PossibleMatchRange(string* min, string* max, int maxlen) { - DFA* dfa = NULL; - { - MutexLock l(&dfa_mutex_); - // Have to use dfa_longest_ to get all strings for full matches. - // For example, (a|aa) never matches aa in first-match mode. - dfa = dfa_longest_; - if (dfa == NULL) { - dfa = new DFA(this, Prog::kLongestMatch, dfa_mem_/2); - ATOMIC_STORE_RELEASE(&dfa_longest_, dfa); - delete_dfa_ = DeleteDFA; - } - } - return dfa->PossibleMatchRange(min, max, maxlen); + // Have to use dfa_longest_ to get all strings for full matches. + // For example, (a|aa) never matches aa in first-match mode. + return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen); } } // namespace re2 diff --git a/re2/parse.cc b/re2/parse.cc index f51e589..f55dc8c 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -23,6 +23,12 @@ #include "re2/unicode_groups.h" #include "re2/walker-inl.h" +#if defined(RE2_USE_ICU) +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "unicode/utypes.h" +#endif + namespace re2 { // Regular expression parse state. @@ -1488,11 +1494,6 @@ static const UGroup* LookupGroup(const StringPiece& name, return NULL; } -// Fake UGroup containing all Runes -static URange16 any16[] = { { 0, 65535 } }; -static URange32 any32[] = { { 65536, Runemax } }; -static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 }; - // Look for a POSIX group with the given name (e.g., "[:^alpha:]") static const UGroup* LookupPosixGroup(const StringPiece& name) { return LookupGroup(name, posix_groups, num_posix_groups); @@ -1502,6 +1503,12 @@ static const UGroup* LookupPerlGroup(const StringPiece& name) { return LookupGroup(name, perl_groups, num_perl_groups); } +#if !defined(RE2_USE_ICU) +// Fake UGroup containing all Runes +static URange16 any16[] = { { 0, 65535 } }; +static URange32 any32[] = { { 65536, Runemax } }; +static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 }; + // Look for a Unicode group with the given name (e.g., "Han") static const UGroup* LookupUnicodeGroup(const StringPiece& name) { // Special case: "Any" means any. @@ -1509,6 +1516,7 @@ static const UGroup* LookupUnicodeGroup(const StringPiece& name) { return &anygroup; return LookupGroup(name, unicode_groups, num_unicode_groups); } +#endif // Add a UGroup or its negation to the character class. static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign, @@ -1600,7 +1608,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, // Committed to parse. Results: int sign = +1; // -1 = negated char class if (c == 'P') - sign = -1; + sign = -sign; StringPiece seq = *s; // \p{Han} or \pL StringPiece name; // Han or L s->remove_prefix(2); // '\\', 'p' @@ -1630,11 +1638,13 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, // Chop seq where s now begins. seq = StringPiece(seq.begin(), static_cast<int>(s->begin() - seq.begin())); - // Look up group if (name.size() > 0 && name[0] == '^') { sign = -sign; name.remove_prefix(1); // '^' } + +#if !defined(RE2_USE_ICU) + // Look up the group in the RE2 Unicode data. const UGroup *g = LookupUnicodeGroup(name); if (g == NULL) { status->set_code(kRegexpBadCharRange); @@ -1643,6 +1653,31 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, } AddUGroup(cc, g, sign, parse_flags); +#else + // Look up the group in the ICU Unicode data. Because ICU provides full + // Unicode properties support, this could be more than a lookup by name. + ::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8( + string("\\p{") + name.ToString() + string("}")); + UErrorCode uerr = U_ZERO_ERROR; + ::icu::UnicodeSet uset(ustr, uerr); + if (U_FAILURE(uerr)) { + status->set_code(kRegexpBadCharRange); + status->set_error_arg(seq); + return kParseError; + } + + // Convert the UnicodeSet to a URange32 and UGroup that we can add. + int nr = uset.getRangeCount(); + URange32* r = new URange32[nr]; + for (int i = 0; i < nr; i++) { + r[i].lo = uset.getRangeStart(i); + r[i].hi = uset.getRangeEnd(i); + } + UGroup g = {"", +1, 0, 0, r, nr}; + AddUGroup(cc, &g, sign, parse_flags); + delete[] r; +#endif + return kParseOk; } diff --git a/re2/prog.cc b/re2/prog.cc index 499f560..cb7e6e0 100644 --- a/re2/prog.cc +++ b/re2/prog.cc @@ -113,11 +113,9 @@ Prog::Prog() } Prog::~Prog() { - if (delete_dfa_) { - if (dfa_first_) - delete_dfa_(dfa_first_); - if (dfa_longest_) - delete_dfa_(dfa_longest_); + if (delete_dfa_ != NULL) { + delete_dfa_(&dfa_first_); + delete_dfa_(&dfa_longest_); } delete[] onepass_nodes_; delete[] inst_; @@ -360,10 +360,10 @@ class Prog { Inst* inst_; // pointer to instruction array Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_ - DFA* volatile dfa_first_; // DFA cached for kFirstMatch - DFA* volatile dfa_longest_; // DFA cached for kLongestMatch and kFullMatch + std::atomic<DFA*> dfa_first_; // DFA cached for kFirstMatch + std::atomic<DFA*> dfa_longest_; // DFA cached for kLongestMatch and kFullMatch int64 dfa_mem_; // Maximum memory for DFAs. - void (*delete_dfa_)(DFA* dfa); + void (*delete_dfa_)(std::atomic<DFA*>* pdfa); Bitmap<256> byterange_; // byterange.Get(x) true if x ends a // commonly-treated byte range. @@ -31,7 +31,7 @@ class RE2::Set { // Compile prepares the Set for matching. // Add must not be called again after Compile. - // Compile must be called before FullMatch or PartialMatch. + // Compile must be called before Match. // Compile may return false if it runs out of memory. bool Compile(); diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc index a872840..2141c53 100644 --- a/re2/testing/backtrack.cc +++ b/re2/testing/backtrack.cc @@ -4,7 +4,7 @@ // Tested by search_test.cc, exhaustive_test.cc, tester.cc // -// Prog::BadSearchBacktrack is a backtracking regular expression search, +// Prog::UnsafeSearchBacktrack is a backtracking regular expression search, // except that it remembers where it has been, trading a lot of // memory for a lot of time. It exists only for testing purposes. // diff --git a/re2/testing/exhaustive1_test.cc b/re2/testing/exhaustive1_test.cc index 9e057cc..c06a10c 100644 --- a/re2/testing/exhaustive1_test.cc +++ b/re2/testing/exhaustive1_test.cc @@ -33,10 +33,9 @@ TEST(Repetition, Capturing) { 7, Explode("ab"), "(?:%s)", ""); // This would be a great test, but it runs forever when PCRE is enabled. - if (strstr("PCRE", FLAGS_regexp_engines.c_str()) == NULL) - ExhaustiveTest(4, 3, Split(" ", "a (a)"), ops, - 100, Explode("a"), "(?:%s)", ""); + if (!StringPiece(FLAGS_regexp_engines).contains("PCRE")) + ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops, + 50, Explode("a"), "(?:%s)", ""); } } // namespace re2 - diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc index e201c1e..5a32d92 100644 --- a/re2/testing/re2_test.cc +++ b/re2/testing/re2_test.cc @@ -6,7 +6,7 @@ // TODO: Test extractions for PartialMatch/Consume #include <errno.h> -#ifndef _MSC_VER +#if !defined(_MSC_VER) && !defined(__MINGW32__) #include <unistd.h> /* for sysconf */ #include <sys/mman.h> #endif @@ -17,8 +17,6 @@ #include "re2/re2.h" #include "re2/regexp.h" -DECLARE_bool(logtostderr); - namespace re2 { TEST(RE2, HexTests) { @@ -1540,7 +1538,7 @@ TEST(RE2, Bug18458852) { } TEST(RE2, Bug18523943) { - // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL). + // Bug in BitState: case kFailInst failed the match entirely. RE2::Options opt; const char a[] = { @@ -1559,7 +1557,7 @@ TEST(RE2, Bug18523943) { RE2 re((const char*)b, opt); CHECK(re.ok()); string s1; - CHECK(!RE2::PartialMatch((const char*)a, re, &s1)); + CHECK(RE2::PartialMatch((const char*)a, re, &s1)); } TEST(RE2, Bug21371806) { @@ -1585,10 +1583,10 @@ TEST(RE2, Bug26356109) { string s = "abc"; StringPiece m; - CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1)); + CHECK(re.Match(s, 0, static_cast<int>(s.size()), RE2::UNANCHORED, &m, 1)); CHECK_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'"; - CHECK(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1)); + CHECK(re.Match(s, 0, static_cast<int>(s.size()), RE2::ANCHOR_BOTH, &m, 1)); CHECK_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'"; } diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc index 141bad1..8d80b4c 100644 --- a/re2/testing/regexp_benchmark.cc +++ b/re2/testing/regexp_benchmark.cc @@ -299,9 +299,9 @@ void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search // Unambiguous search (RE2 can use OnePass). void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA); } -void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); } void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCRE); } void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2); } +void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); } BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); #ifdef USEPCRE @@ -311,9 +311,9 @@ BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs()); void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedDFA); } -void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); } void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedPCRE); } void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedRE2); } +void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); } BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); #ifdef USEPCRE @@ -323,28 +323,31 @@ BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs()); // Ambiguous search (RE2 cannot use OnePass). +// Used to be ".*.$", but that is coalesced to ".+$" these days. -void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchDFA); } -void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchPCRE); } -void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchRE2); } -void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*.$", SearchBitState); } +void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchDFA); } +void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchPCRE); } +void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchRE2); } +void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchBitState); } -BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); #ifdef USEPCRE -BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); #endif -BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); -void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedDFA); } -void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); } -void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedRE2); } +void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedDFA); } +void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedPCRE); } +void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedRE2); } +void Search_Success1_Cached_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedBitState); } -BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); #ifdef USEPCRE -BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); #endif -BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_Cached_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); // Benchmark: use regexp to find phone number. diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc index cb12bad..62cbb6c 100644 --- a/re2/testing/tester.cc +++ b/re2/testing/tester.cc @@ -26,7 +26,7 @@ enum { kMaxSubmatch = 1+16, // $0...$16 }; -const char* engine_types[kEngineMax] = { +const char* engine_names[kEngineMax] = { "Backtrack", "NFA", "DFA", @@ -39,18 +39,18 @@ const char* engine_types[kEngineMax] = { "PCRE", }; -// Returns the name string for the type t. -static string EngineString(Engine t) { - if (t < 0 || t >= arraysize(engine_types) || engine_types[t] == NULL) { - return StringPrintf("type%d", static_cast<int>(t)); - } - return engine_types[t]; +// Returns the name of the engine. +static StringPiece EngineName(Engine e) { + CHECK_GE(e, 0); + CHECK_LT(e, arraysize(engine_names)); + CHECK(engine_names[e] != NULL); + return engine_names[e]; } // Returns bit mask of engines to use. static uint32 Engines() { - static uint32 cached_engines; - static bool did_parse; + static bool did_parse = false; + static uint32 cached_engines = 0; if (did_parse) return cached_engines; @@ -59,7 +59,7 @@ static uint32 Engines() { cached_engines = ~0; } else { for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) - if (strstr(EngineString(i).c_str(), FLAGS_regexp_engines.c_str())) + if (StringPiece(FLAGS_regexp_engines).contains(EngineName(i))) cached_engines |= 1<<i; } @@ -69,8 +69,9 @@ static uint32 Engines() { cached_engines &= ~(1<<kEnginePCRE); for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) { if (cached_engines & (1<<i)) - LOG(INFO) << EngineString(i) << " enabled"; + LOG(INFO) << EngineName(i) << " enabled"; } + did_parse = true; return cached_engines; } @@ -566,7 +567,7 @@ void TestInstance::LogMatch(const char* prefix, Engine e, const StringPiece& text, const StringPiece& context, Prog::Anchor anchor) { LOG(INFO) << prefix - << EngineString(e) + << EngineName(e) << " regexp " << CEscape(regexp_str_) << " " diff --git a/re2/testing/tester.h b/re2/testing/tester.h index d1e1b22..e9fae85 100644 --- a/re2/testing/tester.h +++ b/re2/testing/tester.h @@ -20,7 +20,7 @@ class Regexp; // All the supported regexp engines. enum Engine { - kEngineBacktrack = 0, // Prog::BadSearchBacktrack + kEngineBacktrack = 0, // Prog::UnsafeSearchBacktrack kEngineNFA, // Prog::SearchNFA kEngineDFA, // Prog::SearchDFA, only ask whether it matched kEngineDFA1, // Prog::SearchDFA, ask for match[0] diff --git a/util/atomicops.h b/util/atomicops.h deleted file mode 100644 index dc944e7..0000000 --- a/util/atomicops.h +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2006-2008 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef RE2_UTIL_ATOMICOPS_H__ -#define RE2_UTIL_ATOMICOPS_H__ - -// The memory ordering constraints resemble the ones in C11. -// RELAXED - no memory ordering, just an atomic operation. -// CONSUME - data-dependent ordering. -// ACQUIRE - prevents memory accesses from hoisting above the operation. -// RELEASE - prevents memory accesses from sinking below the operation. - -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - -#if !defined(OS_NACL) && (__has_builtin(__atomic_load_n) || (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__ >= 40801)) - -#define ATOMIC_LOAD_RELAXED(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_RELAXED); } while (0) -#define ATOMIC_LOAD_CONSUME(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_CONSUME); } while (0) -#define ATOMIC_LOAD_ACQUIRE(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_ACQUIRE); } while (0) -#define ATOMIC_STORE_RELAXED(p, v) __atomic_store_n((p), (v), __ATOMIC_RELAXED) -#define ATOMIC_STORE_RELEASE(p, v) __atomic_store_n((p), (v), __ATOMIC_RELEASE) - -#else // old compiler - -#define ATOMIC_LOAD_RELAXED(x, p) do { (x) = *(p); } while (0) -#define ATOMIC_LOAD_CONSUME(x, p) do { (x) = *(p); MaybeReadMemoryBarrier(); } while (0) -#define ATOMIC_LOAD_ACQUIRE(x, p) do { (x) = *(p); ReadMemoryBarrier(); } while (0) -#define ATOMIC_STORE_RELAXED(p, v) do { *(p) = (v); } while (0) -#define ATOMIC_STORE_RELEASE(p, v) do { WriteMemoryBarrier(); *(p) = (v); } while (0) - -// WriteMemoryBarrier(), ReadMemoryBarrier() and MaybeReadMemoryBarrier() -// are an implementation detail and must not be used in the rest of the code. - -#if defined(__i386__) - -static inline void WriteMemoryBarrier() { - int x; - __asm__ __volatile__("xchgl (%0),%0" // The lock prefix is implicit for xchg. - :: "r" (&x)); -} - -#elif defined(__x86_64__) - -// 64-bit implementations of memory barrier can be simpler, because -// "sfence" is guaranteed to exist. -static inline void WriteMemoryBarrier() { - __asm__ __volatile__("sfence" : : : "memory"); -} - -#elif defined(__ppc__) || defined(__powerpc64__) - -static inline void WriteMemoryBarrier() { - __asm__ __volatile__("lwsync" : : : "memory"); -} - -#elif defined(__aarch64__) - -static inline void WriteMemoryBarrier() { - __asm__ __volatile__("dmb st" : : : "memory"); -} - -#elif defined(__alpha__) - -static inline void WriteMemoryBarrier() { - __asm__ __volatile__("wmb" : : : "memory"); -} - -#elif defined(__arm__) && defined(__linux__) - -// Linux on ARM puts a suitable memory barrier at a magic address for us to call. -static inline void WriteMemoryBarrier() { - ((void(*)(void))0xffff0fa0)(); -} - -#elif defined(__windows__) || defined(_WIN32) - -#include <intrin.h> -#include <windows.h> - -static inline void WriteMemoryBarrier() { -#if defined(_M_IX86) || defined(_M_X64) - // x86 and x64 CPUs have a strong memory model that prohibits most types of - // reordering, so a non-instruction intrinsic to suppress compiler reordering - // is sufficient. _WriteBarrier is deprecated, but is still appropriate for - // the "old compiler" path (pre C++11). - _WriteBarrier(); -#else - LONG x; - ::InterlockedExchange(&x, 0); -#endif -} - -#elif defined(OS_NACL) - -static inline void WriteMemoryBarrier() { - __sync_synchronize(); -} - -#elif defined(__mips__) - -static inline void WriteMemoryBarrier() { - __asm__ __volatile__("sync" : : : "memory"); -} - -#else - -#include "util/mutex.h" - -static inline void WriteMemoryBarrier() { - // Slight overkill, but good enough: - // any mutex implementation must have - // a read barrier after the lock operation and - // a write barrier before the unlock operation. - // - // It may be worthwhile to write architecture-specific - // barriers for the common platforms, as above, but - // this is a correct fallback. - re2::Mutex mu; - re2::MutexLock l(&mu); -} - -#endif - -// Alpha has very weak memory ordering. If relying on WriteBarriers, one must -// use read barriers for the readers too. -#if defined(__alpha__) - -static inline void MaybeReadMemoryBarrier() { - __asm__ __volatile__("mb" : : : "memory"); -} - -#else - -static inline void MaybeReadMemoryBarrier() {} - -#endif // __alpha__ - -// Read barrier for various targets. - -#if defined(__ppc__) || defined(__powerpc64__) - -static inline void ReadMemoryBarrier() { - __asm__ __volatile__("lwsync" : : : "memory"); -} - -#elif defined(__aarch64__) - -static inline void ReadMemoryBarrier() { - __asm__ __volatile__("dmb ld" : : : "memory"); -} - -#elif defined(__alpha__) - -static inline void ReadMemoryBarrier() { - __asm__ __volatile__("mb" : : : "memory"); -} - -#elif defined(__mips__) - -static inline void ReadMemoryBarrier() { - __asm__ __volatile__("sync" : : : "memory"); -} - -#else - -static inline void ReadMemoryBarrier() {} - -#endif - -#endif // old compiler - -#ifndef NO_THREAD_SAFETY_ANALYSIS -#define NO_THREAD_SAFETY_ANALYSIS -#endif - -#endif // RE2_UTIL_ATOMICOPS_H__ diff --git a/util/util.h b/util/util.h index c59d91f..8c5d922 100644 --- a/util/util.h +++ b/util/util.h @@ -30,6 +30,7 @@ #include <ostream> #include <utility> #include <set> +#include <atomic> // Use std names. using std::set; @@ -113,6 +114,10 @@ template<bool> struct CompileAssert {}; #define arraysize(array) (int)(sizeof(array)/sizeof((array)[0])) +#ifndef NO_THREAD_SAFETY_ANALYSIS +#define NO_THREAD_SAFETY_ANALYSIS +#endif + class StringPiece; string CEscape(const StringPiece& src); |