diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-11-21 16:59:50 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-11-21 16:59:50 +0900 |
commit | a96ee296357f1a382ea43fe4ca4f633d49e922cf (patch) | |
tree | fef7122e14840a504d4461423e3ca6512741f152 | |
parent | 9408cea9ad30ce73c37268ab8e86ef2cededfadf (diff) | |
download | re2-a96ee296357f1a382ea43fe4ca4f633d49e922cf.tar.gz re2-a96ee296357f1a382ea43fe4ca4f633d49e922cf.tar.bz2 re2-a96ee296357f1a382ea43fe4ca4f633d49e922cf.zip |
Imported Upstream version 20161001upstream/20161001
Change-Id: I4e7b71f50654c54bfd112ed1cb774c9c6bb732ea
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
-rw-r--r-- | BUILD | 1 | ||||
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rwxr-xr-x | kokoro/windows/continuous-cmake.bat | 14 | ||||
-rw-r--r-- | libre2.symbols | 2 | ||||
-rw-r--r-- | libre2.symbols.darwin | 2 | ||||
-rw-r--r-- | re2/bitstate.cc | 5 | ||||
-rw-r--r-- | re2/compile.cc | 6 | ||||
-rw-r--r-- | re2/dfa.cc | 144 | ||||
-rw-r--r-- | re2/filtered_re2.cc | 16 | ||||
-rw-r--r-- | re2/filtered_re2.h | 1 | ||||
-rw-r--r-- | re2/fuzzing/re2_fuzzer.cc | 5 | ||||
-rw-r--r-- | re2/nfa.cc | 35 | ||||
-rw-r--r-- | re2/onepass.cc | 18 | ||||
-rw-r--r-- | re2/prefilter.cc | 29 | ||||
-rw-r--r-- | re2/prefilter.h | 2 | ||||
-rw-r--r-- | re2/prefilter_tree.cc | 136 | ||||
-rw-r--r-- | re2/prefilter_tree.h | 7 | ||||
-rw-r--r-- | re2/prog.cc | 18 | ||||
-rw-r--r-- | re2/prog.h | 17 | ||||
-rw-r--r-- | re2/re2.cc | 125 | ||||
-rw-r--r-- | re2/re2.h | 2 | ||||
-rw-r--r-- | re2/regexp.h | 3 | ||||
-rw-r--r-- | re2/set.cc | 14 | ||||
-rw-r--r-- | re2/stringpiece.cc | 3 | ||||
-rw-r--r-- | re2/testing/dfa_test.cc | 71 | ||||
-rw-r--r-- | re2/testing/filtered_re2_test.cc | 34 | ||||
-rwxr-xr-x | runtests | 2 | ||||
-rw-r--r-- | util/benchmark.cc | 3 | ||||
-rw-r--r-- | util/logging.cc | 9 | ||||
-rw-r--r-- | util/logging.h | 27 | ||||
-rw-r--r-- | util/mix.h | 11 | ||||
-rw-r--r-- | util/pcre.cc | 19 | ||||
-rw-r--r-- | util/sparse_array.h | 48 | ||||
-rw-r--r-- | util/sparse_set.h | 24 |
35 files changed, 375 insertions, 484 deletions
@@ -40,7 +40,6 @@ cc_library( "re2/unicode_groups.h", "re2/walker-inl.h", "util/flags.h", - "util/logging.cc", "util/logging.h", "util/mix.h", "util/mutex.h", diff --git a/CMakeLists.txt b/CMakeLists.txt index bf70d59..3c90f5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,9 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") message(FATAL_ERROR "you need Visual Studio 2013 or later") endif() if(BUILD_SHARED_LIBS) - message(FATAL_ERROR "building a DLL is not currently supported") + # See http://www.kitware.com/blog/home/post/939 for details. + cmake_minimum_required(VERSION 3.4) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) endif() elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") add_compile_options(-std=c++11) @@ -60,7 +62,6 @@ set(RE2_SOURCES re2/tostring.cc re2/unicode_casefold.cc re2/unicode_groups.cc - util/logging.cc util/rune.cc util/strutil.cc ) @@ -107,7 +107,6 @@ HFILES=\ re2/walker-inl.h\ OFILES=\ - obj/util/logging.o\ obj/util/rune.o\ obj/util/strutil.o\ obj/re2/bitstate.o\ diff --git a/kokoro/windows/continuous-cmake.bat b/kokoro/windows/continuous-cmake.bat index 8f0f293..b6d2f6f 100755 --- a/kokoro/windows/continuous-cmake.bat +++ b/kokoro/windows/continuous-cmake.bat @@ -1,11 +1,11 @@ -CD git/re2 || EXIT /B 1 +CD git/re2 || EXIT /B 1 -cmake -D CMAKE_BUILD_TYPE=Debug -A x64 . || EXIT /B 1 -cmake --build . --config Debug --clean-first || EXIT /B 1 -ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1 +cmake -D CMAKE_BUILD_TYPE=Debug -G "Visual Studio 12 2013" -A x64 . || EXIT /B 1 +cmake --build . --config Debug --clean-first || EXIT /B 1 +ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1 -cmake -D CMAKE_BUILD_TYPE=Release -A x64 . || EXIT /B 1 -cmake --build . --config Release --clean-first || EXIT /B 1 -ctest -C Release --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1 +cmake -D CMAKE_BUILD_TYPE=Release -G "Visual Studio 12 2013" -A x64 . || EXIT /B 1 +cmake --build . --config Release --clean-first || EXIT /B 1 +ctest -C Release --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1 EXIT /B 0 diff --git a/libre2.symbols b/libre2.symbols index 90a1020..1224baa 100644 --- a/libre2.symbols +++ b/libre2.symbols @@ -11,8 +11,6 @@ # re2::FilteredRE2* _ZN3re211FilteredRE2*; _ZNK3re211FilteredRE2*; - # flags - _ZN3re2*FLAGS_*; local: *; }; diff --git a/libre2.symbols.darwin b/libre2.symbols.darwin index 4207f87..a4d942f 100644 --- a/libre2.symbols.darwin +++ b/libre2.symbols.darwin @@ -15,5 +15,3 @@ __Zls*RKN3re211StringPieceE # re2::FilteredRE2* __ZN3re211FilteredRE2* __ZNK3re211FilteredRE2* -# flags -__ZN3re2*FLAGS_* diff --git a/re2/bitstate.cc b/re2/bitstate.cc index 4552d17..4b92fee 100644 --- a/re2/bitstate.cc +++ b/re2/bitstate.cc @@ -108,7 +108,6 @@ bool BitState::ShouldVisit(int id, const char* p) { // Grow the stack. bool BitState::GrowStack() { - // VLOG(0) << "Reallocate."; maxjob_ *= 2; Job* newjob = new Job[maxjob_]; memmove(newjob, job_, njob_*sizeof job_[0]); @@ -179,8 +178,6 @@ bool BitState::TrySearch(int id0, const char* p0) { } // Visit ip, p. - // VLOG(0) << "Job: " << ip->id() << " " - // << (p - text_.begin()) << " " << arg; Prog::Inst* ip = prog_->inst(id); switch (ip->opcode()) { default: @@ -274,7 +271,6 @@ bool BitState::TrySearch(int id0, const char* p0) { if (endmatch_ && p != text_.end()) goto Next; - // VLOG(0) << "Found match."; // We found a match. If the caller doesn't care // where the match is, no point going further. if (nsubmatch_ == 0) @@ -334,7 +330,6 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits; visited_ = new uint32_t[nvisited_]; memset(visited_, 0, nvisited_*sizeof visited_[0]); - // VLOG(0) << "nvisited_ = " << nvisited_; ncap_ = 2*nsubmatch; if (ncap_ < 2) diff --git a/re2/compile.cc b/re2/compile.cc index a99c909..472bbfa 100644 --- a/re2/compile.cc +++ b/re2/compile.cc @@ -1266,11 +1266,11 @@ Prog* Compiler::CompileSet(const RE2::Options& options, RE2::Anchor anchor, // Make sure DFA has enough memory to operate, // since we're not going to fall back to the NFA. - bool failed; + bool dfa_failed = false; StringPiece sp = "hello, world"; prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch, - NULL, &failed, NULL); - if (failed) { + NULL, &dfa_failed, NULL); + if (dfa_failed) { delete prog; return NULL; } @@ -29,13 +29,13 @@ #include <algorithm> #include <atomic> #include <map> +#include <mutex> #include <new> #include <string> #include <unordered_set> #include <utility> #include <vector> -#include "util/flags.h" #include "util/logging.h" #include "util/mix.h" #include "util/mutex.h" @@ -44,10 +44,6 @@ #include "re2/prog.h" #include "re2/stringpiece.h" -DEFINE_bool(re2_dfa_bail_when_slow, true, - "Whether the RE2 DFA should bail out early " - "if the NFA would be faster (for testing)."); - // Silence "zero-sized array in struct/union" warning for DFA::State::next_. #ifdef _MSC_VER #pragma warning(disable: 4200) @@ -66,9 +62,12 @@ static void* memrchr(const void* s, int c, size_t n) { } #endif +// Controls whether the DFA should bail out early if the NFA would be faster. +static bool dfa_should_bail_when_slow = true; + // Changing this to true compiles in prints that trace execution of the DFA. // Generates a lot of output -- only useful for debugging. -static const bool DebugDFA = false; +static const bool ExtraDebug = false; // A DFA implementation of a regular expression program. // Since this is entirely a forward declaration mandated by C++, @@ -354,7 +353,6 @@ class DFA { int64_t state_budget_; // Amount of memory remaining for new States. StateSet state_cache_; // All States computed so far. StartInfo start_[kMaxStart]; - bool cache_warned_; // have printed to LOG(INFO) about the cache }; // Shorthand for casting to uint8_t*. @@ -432,9 +430,8 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem) q0_(NULL), q1_(NULL), astack_(NULL), - mem_budget_(max_mem), - cache_warned_(false) { - if (DebugDFA) + mem_budget_(max_mem) { + if (ExtraDebug) fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str()); int nmark = 0; if (kind_ == Prog::kLongestMatch) @@ -451,8 +448,6 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem) (sizeof(int)+sizeof(int)) * 2; // q0, q1 mem_budget_ -= nastack_ * sizeof(int); // astack if (mem_budget_ < 0) { - LOG(INFO) << "DFA out of memory: prog size " << prog_->size() - << " mem " << max_mem; init_failed_ = true; return; } @@ -469,8 +464,6 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem) int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) + (prog_->list_count()+nmark)*sizeof(int); if (state_budget_ < 20*one_state) { - LOG(INFO) << "DFA out of memory: prog size " << prog_->size() - << " mem " << max_mem; init_failed_ = true; return; } @@ -611,7 +604,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint32_t flag) { uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions bool sawmatch = false; // whether queue contains guaranteed kInstMatch bool sawmark = false; // whether queue contains a Mark - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag); for (Workq::iterator it = q->begin(); it != q->end(); ++it) { int id = *it; @@ -637,7 +630,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint32_t flag) { (kind_ != Prog::kLongestMatch || !sawmark) && (flag & kFlagMatch)) { delete[] inst; - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, " -> FullMatchState\n"); return FullMatchState; } @@ -684,7 +677,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint32_t flag) { // if the state is *not* a matching state. if (n == 0 && flag == 0) { delete[] inst; - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, " -> DeadState\n"); return DeadState; } @@ -729,7 +722,7 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) { state.flag_ = flag; StateSet::iterator it = state_cache_.find(&state); if (it != state_cache_.end()) { - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str()); return *it; } @@ -760,7 +753,7 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) { memmove(s->inst_, inst, ninst*sizeof s->inst_[0]); s->ninst_ = ninst; s->flag_ = flag; - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, " -> %s\n", DumpState(s).c_str()); // Put state in cache and return it. @@ -952,7 +945,7 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, } } - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch); } @@ -1098,11 +1091,6 @@ class DFA::RWLocker { // Notice that the lock is *released* temporarily. void LockForWriting(); - // Returns whether the lock is already held for writing. - bool IsLockedForWriting() { - return writing_; - } - private: Mutex* mu_; bool writing_; @@ -1111,9 +1099,7 @@ class DFA::RWLocker { RWLocker& operator=(const RWLocker&) = delete; }; -DFA::RWLocker::RWLocker(Mutex* mu) - : mu_(mu), writing_(false) { - +DFA::RWLocker::RWLocker(Mutex* mu) : mu_(mu), writing_(false) { mu_->ReaderLock(); } @@ -1128,10 +1114,10 @@ void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS { } DFA::RWLocker::~RWLocker() { - if (writing_) - mu_->WriterUnlock(); - else + if (!writing_) mu_->ReaderUnlock(); + else + mu_->WriterUnlock(); } @@ -1148,20 +1134,8 @@ DFA::RWLocker::~RWLocker() { void DFA::ResetCache(RWLocker* cache_lock) { // Re-acquire the cache_mutex_ for writing (exclusive use). - bool was_writing = cache_lock->IsLockedForWriting(); cache_lock->LockForWriting(); - // If we already held cache_mutex_ for writing, it means - // this invocation of Search() has already reset the - // cache once already. That's a pretty clear indication - // that the cache is too small. Warn about that, once. - // TODO(rsc): Only warn if state_cache_.size() < some threshold. - if (was_writing && !cache_warned_) { - LOG(INFO) << "DFA memory cache could be too small: " - << "only room for " << state_cache_.size() << " states."; - cache_warned_ = true; - } - // Clear the cache, reset the memory budget. for (int i = 0; i < kMaxStart; i++) { start_[i].start = NULL; @@ -1343,7 +1317,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, } while (p != ep) { - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str()); if (have_firstbyte && s == start) { @@ -1401,7 +1375,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, // same at about 2 MB/s. Unless we're processing an average // of 10 bytes per state computation, fail so that RE2 can // fall back to the NFA. - if (FLAGS_re2_dfa_bail_when_slow && resetp != NULL && + if (dfa_should_bail_when_slow && resetp != NULL && static_cast<size_t>(p - resetp) < 10*state_cache_.size()) { params->failed = true; return false; @@ -1449,7 +1423,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, lastmatch = p - 1; else lastmatch = p + 1; - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str()); @@ -1494,7 +1468,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, } } s = ns; - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, "@_: %s\n", DumpState(s).c_str()); if (s == FullMatchState) { params->ep = reinterpret_cast<const char*>(ep); @@ -1517,7 +1491,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, } } } - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str()); } @@ -1615,7 +1589,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) { // Sanity check: make sure that text lies within context. if (text.begin() < context.begin() || text.end() > context.end()) { - LOG(DFATAL) << "Text is not inside context."; + LOG(DFATAL) << "context does not contain text"; params->start = DeadState; return true; } @@ -1668,7 +1642,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) { } } - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n", params->anchored, params->run_forward, flags, DumpState(info->start).c_str(), info->firstbyte.load()); @@ -1755,7 +1729,7 @@ bool DFA::Search(const StringPiece& text, } *failed = false; - if (DebugDFA) { + if (ExtraDebug) { fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str()); fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n", text.ToString().c_str(), anchored, want_earliest_match, @@ -1782,7 +1756,7 @@ bool DFA::Search(const StringPiece& text, *epp = text.end(); return true; } - if (DebugDFA) + if (ExtraDebug) fprintf(stderr, "start %s\n", DumpState(params.start).c_str()); bool ret = FastSearchLoop(¶ms); if (params.failed) { @@ -1794,24 +1768,6 @@ bool DFA::Search(const StringPiece& text, } DFA* Prog::GetDFA(MatchKind kind) { - std::atomic<DFA*>* pdfa; - if (kind == kFirstMatch || kind == kManyMatch) { - pdfa = &dfa_first_; - } else { - kind = kLongestMatch; - pdfa = &dfa_longest_; - } - - // Quick check. - DFA* dfa = pdfa->load(std::memory_order_acquire); - if (dfa != NULL) - return dfa; - - MutexLock l(&dfa_mutex_); - dfa = pdfa->load(std::memory_order_relaxed); - if (dfa != NULL) - return dfa; - // For a forward DFA, half the memory goes to each DFA. // However, if it is a "many match" DFA, then there is // no counterpart with which the memory must be shared. @@ -1819,25 +1775,29 @@ DFA* Prog::GetDFA(MatchKind kind) { // For a reverse DFA, all the memory goes to the // "longest match" DFA, because RE2 never does reverse // "first match" searches. - int64_t m = dfa_mem_; - if (reversed_) { - DCHECK_EQ(kind, kLongestMatch); - } else if (kind == kFirstMatch || kind == kLongestMatch) { - m /= 2; + if (kind == kFirstMatch) { + std::call_once(dfa_first_once_, [](Prog* prog) { + prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2); + }, this); + return dfa_first_; + } else if (kind == kManyMatch) { + std::call_once(dfa_first_once_, [](Prog* prog) { + prog->dfa_first_ = new DFA(prog, kManyMatch, prog->dfa_mem_); + }, this); + return dfa_first_; } else { - DCHECK_EQ(kind, kManyMatch); + std::call_once(dfa_longest_once_, [](Prog* prog) { + if (!prog->reversed_) + prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_ / 2); + else + prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_); + }, this); + return dfa_longest_; } - dfa = new DFA(this, kind, m); - - // Synchronize with "quick check" above. - pdfa->store(dfa, std::memory_order_release); - return dfa; } -void Prog::DeleteDFA(std::atomic<DFA*>* pdfa) { - DFA* dfa = pdfa->load(std::memory_order_relaxed); - if (dfa != NULL) - delete dfa; +void Prog::DeleteDFA(DFA* dfa) { + delete dfa; } // Executes the regexp program to search in text, @@ -1955,6 +1915,10 @@ int Prog::BuildEntireDFA(MatchKind kind) { return GetDFA(kind)->BuildAllStates(); } +void Prog::TEST_dfa_should_bail_when_slow(bool b) { + dfa_should_bail_when_slow = b; +} + // Computes min and max for matching string. // Won't return strings bigger than maxlen. bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) { @@ -2020,11 +1984,8 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) { min->clear(); MutexLock lock(&mutex_); for (int i = 0; i < maxlen; i++) { - if (previously_visited_states[s] > kMaxEltRepetitions) { - VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions - << " for state s=" << s << " and min=" << CEscape(*min); + if (previously_visited_states[s] > kMaxEltRepetitions) break; - } previously_visited_states[s]++; // Stop if min is a match. @@ -2057,11 +2018,8 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) { s = params.start; max->clear(); for (int i = 0; i < maxlen; i++) { - if (previously_visited_states[s] > kMaxEltRepetitions) { - VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions - << " for state s=" << s << " and max=" << CEscape(*max); + if (previously_visited_states[s] > kMaxEltRepetitions) break; - } previously_visited_states[s] += 1; // Try to extend the string with high bytes. diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc index 8caf992..12f638a 100644 --- a/re2/filtered_re2.cc +++ b/re2/filtered_re2.cc @@ -19,6 +19,11 @@ FilteredRE2::FilteredRE2() prefilter_tree_(new PrefilterTree()) { } +FilteredRE2::FilteredRE2(int min_atom_len) + : compiled_(false), + prefilter_tree_(new PrefilterTree(min_atom_len)) { +} + FilteredRE2::~FilteredRE2() { for (size_t i = 0; i < re2_vec_.size(); i++) delete re2_vec_[i]; @@ -45,8 +50,13 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, } void FilteredRE2::Compile(std::vector<string>* atoms) { - if (compiled_ || re2_vec_.size() == 0) { - LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size(); + if (compiled_) { + LOG(ERROR) << "Compile called already."; + return; + } + + if (re2_vec_.empty()) { + LOG(ERROR) << "Compile called before Add."; return; } @@ -69,7 +79,7 @@ int FilteredRE2::SlowFirstMatch(const StringPiece& text) const { int FilteredRE2::FirstMatch(const StringPiece& text, const std::vector<int>& atoms) const { if (!compiled_) { - LOG(DFATAL) << "FirstMatch called before Compile"; + LOG(DFATAL) << "FirstMatch called before Compile."; return -1; } std::vector<int> regexps; diff --git a/re2/filtered_re2.h b/re2/filtered_re2.h index 48a6fd8..b1317cc 100644 --- a/re2/filtered_re2.h +++ b/re2/filtered_re2.h @@ -33,6 +33,7 @@ class PrefilterTree; class FilteredRE2 { public: FilteredRE2(); + explicit FilteredRE2(int min_atom_len); ~FilteredRE2(); // Uses RE2 constructor to create a RE2 object (re). Returns diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc index 849e79d..6bf771f 100644 --- a/re2/fuzzing/re2_fuzzer.cc +++ b/re2/fuzzing/re2_fuzzer.cc @@ -8,9 +8,7 @@ #include <string> #include "re2/re2.h" -#include "util/logging.h" -using re2::FLAGS_minloglevel; using re2::StringPiece; using std::string; @@ -54,9 +52,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { if (size == 0 || size > 1000000) return 0; - // Suppress logging below FATAL severity. - FLAGS_minloglevel = 3; - // The one-at-a-time hash by Bob Jenkins. uint32_t hash = 0; for (size_t i = 0; i < size; i++) { @@ -40,6 +40,8 @@ namespace re2 { +static const bool ExtraDebug = false; + class NFA { public: NFA(Prog* prog); @@ -60,8 +62,6 @@ class NFA { bool anchored, bool longest, StringPiece* submatch, int nsubmatch); - static const int Debug = 0; - private: struct Thread { union { @@ -238,7 +238,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag, if (id == 0) continue; if (q->has_index(id)) { - if (Debug) + if (ExtraDebug) fprintf(stderr, " [%d%s]\n", id, FormatCapture(t0->capture).c_str()); continue; } @@ -304,7 +304,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag, // Save state; will pick up at next byte. t = Incref(t0); *tp = t; - if (Debug) + if (ExtraDebug) fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str()); Next: @@ -435,12 +435,6 @@ string NFA::FormatCapture(const char** capture) { return s; } -// Returns whether haystack contains needle's memory. -static bool StringPieceContains(const StringPiece haystack, const StringPiece needle) { - return haystack.begin() <= needle.begin() && - haystack.end() >= needle.end(); -} - bool NFA::Search(const StringPiece& text, const StringPiece& const_context, bool anchored, bool longest, StringPiece* submatch, int nsubmatch) { @@ -451,12 +445,9 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, if (context.begin() == NULL) context = text; - if (!StringPieceContains(context, text)) { - LOG(FATAL) << "Bad args: context does not contain text " - << reinterpret_cast<const void*>(context.begin()) - << "+" << context.size() << " " - << reinterpret_cast<const void*>(text.begin()) - << "+" << text.size(); + // Sanity check: make sure that text lies within context. + if (text.begin() < context.begin() || text.end() > context.end()) { + LOG(DFATAL) << "context does not contain text"; return false; } @@ -493,11 +484,10 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, // For debugging prints. btext_ = context.begin(); - if (Debug) { + if (ExtraDebug) fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n", text.ToString().c_str(), context.ToString().c_str(), anchored, longest); - } // Set up search. Threadq* runq = &q0_; @@ -537,7 +527,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, else flag |= kEmptyNonWordBoundary; - if (Debug) { + if (ExtraDebug) { int c = 0; if (p == context.begin()) c = '^'; @@ -629,7 +619,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, // If all the threads have died, stop early. if (runq->size() == 0) { - if (Debug) + if (ExtraDebug) fprintf(stderr, "dead\n"); break; } @@ -645,12 +635,11 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, submatch[i] = StringPiece(match_[2 * i], static_cast<size_t>(match_[2 * i + 1] - match_[2 * i])); - if (Debug) + if (ExtraDebug) fprintf(stderr, "match (%td,%td)\n", match_[0] - btext_, match_[1] - btext_); return true; } - VLOG(1) << "No matches found"; return false; } @@ -719,7 +708,7 @@ bool Prog::SearchNFA(const StringPiece& text, const StringPiece& context, Anchor anchor, MatchKind kind, StringPiece* match, int nmatch) { - if (NFA::Debug) + if (ExtraDebug) Dump(); NFA nfa(this); diff --git a/re2/onepass.cc b/re2/onepass.cc index 65eb937..10dc642 100644 --- a/re2/onepass.cc +++ b/re2/onepass.cc @@ -72,7 +72,7 @@ namespace re2 { -static const int Debug = 0; +static const bool ExtraDebug = false; // The key insight behind this implementation is that the // non-determinism in an NFA for a one-pass regular expression @@ -460,7 +460,7 @@ bool Prog::IsOnePass() { int nextindex = nodebyid[ip->out()]; if (nextindex == -1) { if (nalloc >= maxnodes) { - if (Debug) + if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: hit node limit %d >= %d", nalloc, maxnodes); goto fail; @@ -485,10 +485,9 @@ bool Prog::IsOnePass() { if ((act & kImpossible) == kImpossible) { node->action[b] = newact; } else if (act != newact) { - if (Debug) { + if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: conflict on byte %#x at state %d", c, *it); - } goto fail; } } @@ -507,10 +506,9 @@ bool Prog::IsOnePass() { if ((act & kImpossible) == kImpossible) { node->action[b] = newact; } else if (act != newact) { - if (Debug) { + if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: conflict on byte %#x at state %d", c, *it); - } goto fail; } } @@ -549,10 +547,9 @@ bool Prog::IsOnePass() { // If already on work queue, (1) is violated: bail out. if (!AddQ(&workq, ip->out())) { - if (Debug) { + if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: multiple paths %d -> %d\n", *it, ip->out()); - } goto fail; } id = ip->out(); @@ -561,10 +558,9 @@ bool Prog::IsOnePass() { case kInstMatch: if (matched) { // (3) is violated - if (Debug) { + if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: multiple matches from %d\n", *it); - } goto fail; } matched = true; @@ -584,7 +580,7 @@ bool Prog::IsOnePass() { } } - if (Debug) { // For debugging, dump one-pass NFA to LOG(ERROR). + if (ExtraDebug) { // For debugging, dump one-pass NFA to LOG(ERROR). LOG(ERROR) << "bytemap:\n" << DumpByteMap(); LOG(ERROR) << "prog:\n" << Dump(); diff --git a/re2/prefilter.cc b/re2/prefilter.cc index eaf4a27..dbbbc97 100644 --- a/re2/prefilter.cc +++ b/re2/prefilter.cc @@ -19,7 +19,7 @@ namespace re2 { -static const int Trace = false; +static const bool ExtraDebug = false; typedef std::set<string>::iterator SSIter; typedef std::set<string>::const_iterator ConstSSIter; @@ -30,13 +30,10 @@ Prefilter::Prefilter(Op op) { subs_ = NULL; if (op_ == AND || op_ == OR) subs_ = new std::vector<Prefilter*>; - - VLOG(10) << "constructed: " << reinterpret_cast<intptr_t>(this); } // Destroys a Prefilter. Prefilter::~Prefilter() { - VLOG(10) << "destructing: " << reinterpret_cast<intptr_t>(this); if (subs_) { for (size_t i = 0; i < subs_->size(); i++) delete (*subs_)[i]; @@ -456,10 +453,10 @@ Prefilter::Info* Prefilter::Info::EmptyString() { typedef CharClass::iterator CCIter; Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, bool latin1) { - if (Trace) { - VLOG(0) << "CharClassInfo:"; + if (ExtraDebug) { + LOG(ERROR) << "CharClassInfo:"; for (CCIter i = cc->begin(); i != cc->end(); ++i) - VLOG(0) << " " << i->lo << "-" << i->hi; + LOG(ERROR) << " " << i->lo << "-" << i->hi; } // If the class is too large, it's okay to overestimate. @@ -479,9 +476,8 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, a->is_exact_ = true; - if (Trace) { - VLOG(0) << " = " << a->ToString(); - } + if (ExtraDebug) + LOG(ERROR) << " = " << a->ToString(); return a; } @@ -508,9 +504,8 @@ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> { }; Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { - if (Trace) { - LOG(INFO) << "BuildPrefilter::Info: " << re->ToString(); - } + if (ExtraDebug) + LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString(); bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0; Prefilter::Info::Walker w(latin1); @@ -612,7 +607,6 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( info = child_args[0]; for (int i = 1; i < nchild_args; i++) info = Alt(info, child_args[i]); - VLOG(10) << "Alt: " << info->ToString(); break; case kRegexpStar: @@ -642,10 +636,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( break; } - if (Trace) { - VLOG(0) << "BuildInfo " << re->ToString() - << ": " << (info ? info->ToString() : ""); - } + if (ExtraDebug) + LOG(ERROR) << "BuildInfo " << re->ToString() + << ": " << (info ? info->ToString() : ""); return info; } diff --git a/re2/prefilter.h b/re2/prefilter.h index f400e4f..ead09e1 100644 --- a/re2/prefilter.h +++ b/re2/prefilter.h @@ -43,7 +43,7 @@ class Prefilter { // The children of the Prefilter node. std::vector<Prefilter*>* subs() { - CHECK(op_ == AND || op_ == OR); + DCHECK(op_ == AND || op_ == OR); return subs_; } diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc index 4238674..885cfb6 100644 --- a/re2/prefilter_tree.cc +++ b/re2/prefilter_tree.cc @@ -14,19 +14,22 @@ #include <vector> #include "util/util.h" -#include "util/flags.h" #include "util/logging.h" #include "re2/prefilter.h" #include "re2/re2.h" -DEFINE_int32(filtered_re2_min_atom_len, - 3, - "Strings less than this length are not stored as atoms"); - namespace re2 { +static const bool ExtraDebug = false; + PrefilterTree::PrefilterTree() - : compiled_(false) { + : compiled_(false), + min_atom_len_(3) { +} + +PrefilterTree::PrefilterTree(int min_atom_len) + : compiled_(false), + min_atom_len_(min_atom_len) { } PrefilterTree::~PrefilterTree() { @@ -37,62 +40,22 @@ PrefilterTree::~PrefilterTree() { delete entries_[i].parents; } -// Functions used for adding and Compiling prefilters to the -// PrefilterTree. -static bool KeepPart(Prefilter* prefilter, int level) { - if (prefilter == NULL) - return false; - - switch (prefilter->op()) { - default: - LOG(DFATAL) << "Unexpected op in KeepPart: " - << prefilter->op(); - return false; - - case Prefilter::ALL: - return false; - - case Prefilter::ATOM: - return prefilter->atom().size() >= - static_cast<size_t>(FLAGS_filtered_re2_min_atom_len); - - case Prefilter::AND: { - int j = 0; - std::vector<Prefilter*>* subs = prefilter->subs(); - for (size_t i = 0; i < subs->size(); i++) - if (KeepPart((*subs)[i], level + 1)) - (*subs)[j++] = (*subs)[i]; - else - delete (*subs)[i]; - - subs->resize(j); - return j > 0; - } - - case Prefilter::OR: - for (size_t i = 0; i < prefilter->subs()->size(); i++) - if (!KeepPart((*prefilter->subs())[i], level + 1)) - return false; - return true; - } -} - -void PrefilterTree::Add(Prefilter *f) { +void PrefilterTree::Add(Prefilter* prefilter) { if (compiled_) { - LOG(DFATAL) << "Add after Compile."; + LOG(DFATAL) << "Add called after Compile."; return; } - if (f != NULL && !KeepPart(f, 0)) { - delete f; - f = NULL; + if (prefilter != NULL && !KeepNode(prefilter)) { + delete prefilter; + prefilter = NULL; } - prefilter_vec_.push_back(f); + prefilter_vec_.push_back(prefilter); } void PrefilterTree::Compile(std::vector<string>* atom_vec) { if (compiled_) { - LOG(DFATAL) << "Compile after Compile."; + LOG(DFATAL) << "Compile called already."; return; } @@ -136,7 +99,8 @@ void PrefilterTree::Compile(std::vector<string>* atom_vec) { } } - PrintDebugInfo(); + if (ExtraDebug) + PrintDebugInfo(); } Prefilter* PrefilterTree::CanonicalNode(Prefilter* node) { @@ -162,6 +126,42 @@ string PrefilterTree::NodeString(Prefilter* node) const { return s; } +bool PrefilterTree::KeepNode(Prefilter* node) const { + if (node == NULL) + return false; + + switch (node->op()) { + default: + LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op(); + return false; + + case Prefilter::ALL: + return false; + + case Prefilter::ATOM: + return node->atom().size() >= static_cast<size_t>(min_atom_len_); + + case Prefilter::AND: { + int j = 0; + std::vector<Prefilter*>* subs = node->subs(); + for (size_t i = 0; i < subs->size(); i++) + if (KeepNode((*subs)[i])) + (*subs)[j++] = (*subs)[i]; + else + delete (*subs)[i]; + + subs->resize(j); + return j > 0; + } + + case Prefilter::OR: + for (size_t i = 0; i < node->subs()->size(); i++) + if (!KeepNode((*node->subs())[i])) + return false; + return true; + } +} + void PrefilterTree::AssignUniqueIds(std::vector<string>* atom_vec) { atom_vec->clear(); @@ -294,7 +294,7 @@ void PrefilterTree::RegexpsGivenStrings( std::vector<int>* regexps) const { regexps->clear(); if (!compiled_) { - LOG(WARNING) << "Compile() not called"; + LOG(ERROR) << "RegexpsGivenStrings called before Compile."; for (size_t i = 0; i < prefilter_vec_.size(); ++i) regexps->push_back(static_cast<int>(i)); } else { @@ -303,7 +303,6 @@ void PrefilterTree::RegexpsGivenStrings( std::vector<int> matched_atom_ids; for (size_t j = 0; j < matched_atoms.size(); j++) { matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]); - VLOG(10) << "Atom id:" << atom_index_to_id_[matched_atoms[j]]; } PropagateMatch(matched_atom_ids, ®exps_map); for (IntMap::iterator it = regexps_map.begin(); @@ -325,12 +324,9 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids, work.set(atom_ids[i], 1); for (IntMap::iterator it = work.begin(); it != work.end(); ++it) { const Entry& entry = entries_[it->index()]; - VLOG(10) << "Processing: " << it->index(); // Record regexps triggered. - for (size_t i = 0; i < entry.regexps.size(); i++) { - VLOG(10) << "Regexp triggered: " << entry.regexps[i]; + for (size_t i = 0; i < entry.regexps.size(); i++) regexps->set(entry.regexps[i], 1); - } int c; // Pass trigger up to parents. for (StdIntMap::iterator it = entry.parents->begin(); @@ -338,7 +334,6 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids, ++it) { int j = it->first; const Entry& parent = entries_[j]; - VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count; // Delay until all the children have succeeded. if (parent.propagate_up_at_count > 1) { if (count.has_index(j)) { @@ -351,7 +346,6 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids, if (c < parent.propagate_up_at_count) continue; } - VLOG(10) << "Triggering: " << j; // Trigger the parent. work.set(j, 1); } @@ -360,26 +354,26 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids, // Debugging help. void PrefilterTree::PrintPrefilter(int regexpid) { - LOG(INFO) << DebugNodeString(prefilter_vec_[regexpid]); + LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]); } void PrefilterTree::PrintDebugInfo() { - VLOG(10) << "#Unique Atoms: " << atom_index_to_id_.size(); - VLOG(10) << "#Unique Nodes: " << entries_.size(); + LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size(); + LOG(ERROR) << "#Unique Nodes: " << entries_.size(); for (size_t i = 0; i < entries_.size(); ++i) { StdIntMap* parents = entries_[i].parents; const std::vector<int>& regexps = entries_[i].regexps; - VLOG(10) << "EntryId: " << i - << " N: " << parents->size() << " R: " << regexps.size(); + LOG(ERROR) << "EntryId: " << i + << " N: " << parents->size() << " R: " << regexps.size(); for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) - VLOG(10) << it->first; + LOG(ERROR) << it->first; } - VLOG(10) << "Map:"; + LOG(ERROR) << "Map:"; for (std::map<string, Prefilter*>::const_iterator iter = node_map_.begin(); iter != node_map_.end(); ++iter) - VLOG(10) << "NodeId: " << (*iter).second->unique_id() - << " Str: " << (*iter).first; + LOG(ERROR) << "NodeId: " << (*iter).second->unique_id() + << " Str: " << (*iter).first; } string PrefilterTree::DebugNodeString(Prefilter* node) const { diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h index 276d934..4c60801 100644 --- a/re2/prefilter_tree.h +++ b/re2/prefilter_tree.h @@ -33,6 +33,7 @@ class Prefilter; class PrefilterTree { public: PrefilterTree(); + explicit PrefilterTree(int min_atom_len); ~PrefilterTree(); // Adds the prefilter for the next regexp. Note that we assume that @@ -84,6 +85,9 @@ class PrefilterTree { }; private: + // Returns true if the prefilter node should be kept. + bool KeepNode(Prefilter* node) const; + // This function assigns unique ids to various parts of the // prefilter, by looking at if these nodes are already in the // PrefilterTree. @@ -127,6 +131,9 @@ class PrefilterTree { // Has the prefilter tree been compiled. bool compiled_; + // Strings less than this length are not stored as atoms. + const int min_atom_len_; + PrefilterTree(const PrefilterTree&) = delete; PrefilterTree& operator=(const PrefilterTree&) = delete; }; diff --git a/re2/prog.cc b/re2/prog.cc index bd83422..714bff8 100644 --- a/re2/prog.cc +++ b/re2/prog.cc @@ -114,14 +114,14 @@ Prog::Prog() list_count_(0), inst_(NULL), onepass_nodes_(NULL), + dfa_mem_(0), dfa_first_(NULL), - dfa_longest_(NULL), - dfa_mem_(0) { + dfa_longest_(NULL) { } Prog::~Prog() { - DeleteDFA(&dfa_first_); - DeleteDFA(&dfa_longest_); + DeleteDFA(dfa_longest_); + DeleteDFA(dfa_first_); delete[] onepass_nodes_; delete[] inst_; } @@ -190,9 +190,9 @@ string Prog::DumpByteMap() { } int Prog::first_byte() { - std::call_once(first_byte_once_, [this]() { - first_byte_ = ComputeFirstByte(); - }); + std::call_once(first_byte_once_, [](Prog* prog) { + prog->first_byte_ = prog->ComputeFirstByte(); + }, this); return first_byte_; } @@ -513,11 +513,11 @@ void Prog::ComputeByteMap() { builder.Build(bytemap_, &bytemap_range_); - if (0) { // For debugging: use trivial bytemap. + if (0) { // For debugging, use trivial bytemap. + LOG(ERROR) << "Using trivial bytemap."; for (int i = 0; i < 256; i++) bytemap_[i] = static_cast<uint8_t>(i); bytemap_range_ = 256; - LOG(INFO) << "Using trivial bytemap."; } } @@ -10,14 +10,12 @@ // expression symbolically. #include <stdint.h> -#include <atomic> #include <mutex> #include <string> #include <vector> #include "util/util.h" #include "util/logging.h" -#include "util/mutex.h" #include "util/sparse_array.h" #include "util/sparse_set.h" #include "re2/re2.h" @@ -264,6 +262,10 @@ class Prog { // for testing purposes. Returns number of states. int BuildEntireDFA(MatchKind kind); + // Controls whether the DFA should bail out early if the NFA would be faster. + // FOR TESTING ONLY. + static void TEST_dfa_should_bail_when_slow(bool b); + // Compute bytemap. void ComputeByteMap(); @@ -345,7 +347,7 @@ class Prog { friend class Compiler; DFA* GetDFA(MatchKind kind); - void DeleteDFA(std::atomic<DFA*>* pdfa); + void DeleteDFA(DFA* dfa); bool anchor_start_; // regexp has explicit start anchor bool anchor_end_; // regexp has explicit end anchor @@ -366,14 +368,15 @@ class Prog { Inst* inst_; // pointer to instruction array uint8_t* onepass_nodes_; // data for OnePass nodes - Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_ - std::atomic<DFA*> dfa_first_; // DFA cached for kFirstMatch - std::atomic<DFA*> dfa_longest_; // DFA cached for kLongestMatch/kFullMatch - int64_t dfa_mem_; // Maximum memory for DFAs. + int64_t dfa_mem_; // Maximum memory for DFAs. + DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch + DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch uint8_t bytemap_[256]; // map from input bytes to byte classes std::once_flag first_byte_once_; + std::once_flag dfa_first_once_; + std::once_flag dfa_longest_once_; Prog(const Prog&) = delete; Prog& operator=(const Prog&) = delete; @@ -23,7 +23,6 @@ #include <vector> #include "util/util.h" -#include "util/flags.h" #include "util/logging.h" #include "util/sparse_array.h" #include "util/strutil.h" @@ -31,17 +30,12 @@ #include "re2/prog.h" #include "re2/regexp.h" -DEFINE_bool(trace_re2, false, "trace RE2 execution"); - namespace re2 { // Maximum number of args we can set static const int kMaxArgs = 16; static const int kVecSize = 1+kMaxArgs; -#ifdef _MSC_VER -__declspec(selectany) -#endif const int RE2::Options::kDefaultMaxMem; // initialized in re2.h RE2::Options::Options(RE2::CannedOptions opt) @@ -236,15 +230,16 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { // Returns rprog_, computing it if needed. re2::Prog* RE2::ReverseProg() const { - std::call_once(rprog_once_, [this]() { - rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3); - if (rprog_ == NULL) { - if (options_.log_errors()) - LOG(ERROR) << "Error reverse compiling '" << trunc(pattern_) << "'"; - error_ = new string("pattern too large - reverse compile failed"); - error_code_ = RE2::ErrorPatternTooLarge; + std::call_once(rprog_once_, [](const RE2* re) { + re->rprog_ = + re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3); + if (re->rprog_ == NULL) { + if (re->options_.log_errors()) + LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'"; + re->error_ = new string("pattern too large - reverse compile failed"); + re->error_code_ = RE2::ErrorPatternTooLarge; } - }); + }, this); return rprog_; } @@ -289,32 +284,32 @@ int RE2::ProgramFanout(std::map<int, int>* histogram) const { // Returns num_captures_, computing it if needed, or -1 if the // regexp wasn't valid on construction. int RE2::NumberOfCapturingGroups() const { - std::call_once(num_captures_once_, [this]() { - if (suffix_regexp_ != NULL) - num_captures_ = suffix_regexp_->NumCaptures(); - }); + std::call_once(num_captures_once_, [](const RE2* re) { + if (re->suffix_regexp_ != NULL) + re->num_captures_ = re->suffix_regexp_->NumCaptures(); + }, this); return num_captures_; } // Returns named_groups_, computing it if needed. const std::map<string, int>& RE2::NamedCapturingGroups() const { - std::call_once(named_groups_once_, [this]() { - if (suffix_regexp_ != NULL) - named_groups_ = suffix_regexp_->NamedCaptures(); - if (named_groups_ == NULL) - named_groups_ = empty_named_groups; - }); + std::call_once(named_groups_once_, [](const RE2* re) { + if (re->suffix_regexp_ != NULL) + re->named_groups_ = re->suffix_regexp_->NamedCaptures(); + if (re->named_groups_ == NULL) + re->named_groups_ = empty_named_groups; + }, this); return *named_groups_; } // Returns group_names_, computing it if needed. const std::map<int, string>& RE2::CapturingGroupNames() const { - std::call_once(group_names_once_, [this]() { - if (suffix_regexp_ != NULL) - group_names_ = suffix_regexp_->CaptureNames(); - if (group_names_ == NULL) - group_names_ = empty_group_names; - }); + std::call_once(group_names_once_, [](const RE2* re) { + if (re->suffix_regexp_ != NULL) + re->group_names_ = re->suffix_regexp_->CaptureNames(); + if (re->group_names_ == NULL) + re->group_names_ = empty_group_names; + }, this); return *group_names_; } @@ -668,24 +663,16 @@ bool RE2::Match(const StringPiece& text, if (!prog_->SearchDFA(subtext, text, anchor, kind, matchp, &dfa_failed, NULL)) { if (dfa_failed) { + if (options_.log_errors()) + LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " + << "bytemap range " << prog_->bytemap_range() << ", " + << "list count " << prog_->list_count(); // Fall back to NFA below. skipped_test = true; - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " DFA failed."; break; } - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " used DFA - no match."; return false; } - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " used DFA - match"; if (matchp == NULL) // Matched. Don't care where return true; // SearchDFA set match[0].end() but didn't know where the @@ -697,26 +684,18 @@ bool RE2::Match(const StringPiece& text, if (!prog->SearchDFA(match, text, Prog::kAnchored, Prog::kLongestMatch, &match, &dfa_failed, NULL)) { if (dfa_failed) { + if (options_.log_errors()) + LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " + << "bytemap range " << prog_->bytemap_range() << ", " + << "list count " << prog_->list_count(); // Fall back to NFA below. skipped_test = true; - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " reverse DFA failed."; break; } - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " DFA inconsistency."; if (options_.log_errors()) - LOG(ERROR) << "DFA inconsistency"; + LOG(ERROR) << "SearchDFA inconsistency"; return false; } - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " used reverse DFA."; break; } @@ -735,35 +714,24 @@ bool RE2::Match(const StringPiece& text, // the DFA does. if (can_one_pass && text.size() <= 4096 && (ncap > 1 || text.size() <= 8)) { - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " skipping DFA for OnePass."; skipped_test = true; break; } if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) { - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " skipping DFA for BitState."; skipped_test = true; break; } if (!prog_->SearchDFA(subtext, text, anchor, kind, &match, &dfa_failed, NULL)) { if (dfa_failed) { - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " DFA failed."; + if (options_.log_errors()) + LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " + << "bytemap range " << prog_->bytemap_range() << ", " + << "list count " << prog_->list_count(); + // Fall back to NFA below. skipped_test = true; break; } - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " used DFA - no match."; return false; } break; @@ -789,20 +757,12 @@ bool RE2::Match(const StringPiece& text, } if (can_one_pass && anchor != Prog::kUnanchored) { - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " using OnePass."; if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) { if (!skipped_test && options_.log_errors()) LOG(ERROR) << "SearchOnePass inconsistency"; return false; } } else if (can_bit_state && subtext1.size() <= bit_state_text_max) { - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " using BitState."; if (!prog_->SearchBitState(subtext1, text, anchor, kind, submatch, ncap)) { if (!skipped_test && options_.log_errors()) @@ -810,10 +770,6 @@ bool RE2::Match(const StringPiece& text, return false; } } else { - if (FLAGS_trace_re2) - LOG(INFO) << "Match " << trunc(pattern_) - << " [" << CEscape(subtext) << "]" - << " using NFA."; if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) { if (!skipped_test && options_.log_errors()) LOG(ERROR) << "SearchNFA inconsistency"; @@ -880,7 +836,6 @@ bool RE2::DoMatch(const StringPiece& text, int ncap = NumberOfCapturingGroups(); if (ncap < n) { // RE has fewer capturing groups than number of arg pointers passed in - VLOG(1) << "Asked for " << n << " but only have " << ncap; delete[] heapvec; return false; } @@ -890,8 +845,6 @@ bool RE2::DoMatch(const StringPiece& text, const StringPiece& s = vec[i+1]; if (!args[i]->Parse(s.data(), s.size())) { // TODO: Should we indicate what the error was? - VLOG(1) << "Parse error on #" << i << " " << s << " " - << (void*)s.data() << "/" << s.size(); delete[] heapvec; return false; } @@ -905,7 +905,7 @@ class LazyRE2 { // Named accessor/initializer: RE2* get() const { - std::call_once(once_, [this]() { LazyRE2::Init(this); }); + std::call_once(once_, &LazyRE2::Init, this); return ptr_; } diff --git a/re2/regexp.h b/re2/regexp.h index c11c2c1..e77d0d8 100644 --- a/re2/regexp.h +++ b/re2/regexp.h @@ -497,8 +497,7 @@ class Regexp { // Allocate space for n sub-regexps. void AllocSub(int n) { - if (n < 0 || static_cast<uint16_t>(n) != n) - LOG(FATAL) << "Cannot AllocSub " << n; + DCHECK(n >= 0 && static_cast<uint16_t>(n) == n); if (n > 1) submany_ = new Regexp*[n]; nsub_ = n; @@ -101,12 +101,16 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const { } if (v != NULL) v->clear(); - bool failed; + bool dfa_failed = false; bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, - Prog::kManyMatch, NULL, &failed, v); - if (failed) - LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space"; - + Prog::kManyMatch, NULL, &dfa_failed, v); + if (dfa_failed) { + if (options_.log_errors()) + LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " + << "bytemap range " << prog_->bytemap_range() << ", " + << "list count " << prog_->list_count(); + return false; + } if (ret == false) return false; if (v != NULL && v->empty()) { diff --git a/re2/stringpiece.cc b/re2/stringpiece.cc index 94c2bcb..d25351f 100644 --- a/re2/stringpiece.cc +++ b/re2/stringpiece.cc @@ -10,9 +10,6 @@ using re2::StringPiece; -#ifdef _MSC_VER -__declspec(selectany) -#endif const StringPiece::size_type StringPiece::npos; // initialized in stringpiece.h StringPiece::size_type StringPiece::copy(char* buf, size_type n, diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc index 2051683..e56b3e0 100644 --- a/re2/testing/dfa_test.cc +++ b/re2/testing/dfa_test.cc @@ -18,8 +18,6 @@ static const bool UsingMallocCounter = false; -DECLARE_bool(re2_dfa_bail_when_slow); - DEFINE_int32(size, 8, "log2(number of DFA nodes)"); DEFINE_int32(repeat, 2, "Repetition count."); DEFINE_int32(threads, 4, "number of threads"); @@ -77,15 +75,10 @@ TEST(Multithreaded, BuildEntireDFA) { // the DFA once the memory limits are reached. TEST(SingleThreaded, BuildEntireDFA) { // Create regexp with 2^30 states in DFA. - string s = "a"; - for (int i = 0; i < 30; i++) - s += "[ab]"; - s += "b"; - - Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL); + Regexp* re = Regexp::Parse("a[ab]{30}b", Regexp::LikePerl, NULL); CHECK(re); - int max = 24; - for (int i = 17; i < max; i++) { + + for (int i = 17; i < 24; i++) { int64_t limit = 1<<i; int64_t usage; //int64_t progusage, dfamem; @@ -100,15 +93,15 @@ TEST(SingleThreaded, BuildEntireDFA) { usage = m.HeapGrowth(); delete prog; } - if (!UsingMallocCounter) - continue; - //LOG(INFO) << "limit " << limit << ", " - // << "prog usage " << progusage << ", " - // << "DFA budget " << dfamem << ", " - // << "total " << usage; - // Tolerate +/- 10%. - CHECK_GT(usage, limit*9/10); - CHECK_LT(usage, limit*11/10); + if (UsingMallocCounter) { + //LOG(INFO) << "limit " << limit << ", " + // << "prog usage " << progusage << ", " + // << "DFA budget " << dfamem << ", " + // << "total " << usage; + // Tolerate +/- 10%. + CHECK_GT(usage, limit*9/10); + CHECK_LT(usage, limit*11/10); + } } re->Decref(); } @@ -166,6 +159,14 @@ static string DeBruijnString(int n) { // 2^n byte limit, it must be handling out-of-memory conditions // gracefully. TEST(SingleThreaded, SearchDFA) { + // The De Bruijn string is the worst case input for this regexp. + // By default, the DFA will notice that it is flushing its cache + // too frequently and will bail out early, so that RE2 can use the + // NFA implementation instead. (The DFA loses its speed advantage + // if it can't get a good cache hit rate.) + // Tell the DFA to trudge along instead. + Prog::TEST_dfa_should_bail_when_slow(false); + // Choice of n is mostly arbitrary, except that: // * making n too big makes the test run for too long. // * making n too small makes the DFA refuse to run, @@ -182,14 +183,6 @@ TEST(SingleThreaded, SearchDFA) { string no_match = DeBruijnString(n); string match = no_match + "0"; - // The De Bruijn string is the worst case input for this regexp. - // By default, the DFA will notice that it is flushing its cache - // too frequently and will bail out early, so that RE2 can use the - // NFA implementation instead. (The DFA loses its speed advantage - // if it can't get a good cache hit rate.) - // Tell the DFA to trudge along instead. - FLAGS_re2_dfa_bail_when_slow = false; - int64_t usage; int64_t peak_usage; { @@ -197,7 +190,8 @@ TEST(SingleThreaded, SearchDFA) { Prog* prog = re->CompileToProg(1<<n); CHECK(prog); for (int i = 0; i < 10; i++) { - bool matched, failed = false; + bool matched = false; + bool failed = false; matched = prog->SearchDFA(match, NULL, Prog::kUnanchored, Prog::kFirstMatch, NULL, &failed, NULL); @@ -213,13 +207,16 @@ TEST(SingleThreaded, SearchDFA) { peak_usage = m.PeakHeapGrowth(); delete prog; } - if (!UsingMallocCounter) - return; - //LOG(INFO) << "usage " << usage << ", " - // << "peak usage " << peak_usage; - CHECK_LT(usage, 1<<n); - CHECK_LT(peak_usage, 1<<n); + if (UsingMallocCounter) { + //LOG(INFO) << "usage " << usage << ", " + // << "peak usage " << peak_usage; + CHECK_LT(usage, 1<<n); + CHECK_LT(peak_usage, 1<<n); + } re->Decref(); + + // Reset to original behaviour. + Prog::TEST_dfa_should_bail_when_slow(true); } // Helper function: searches for match, which should match, @@ -243,6 +240,8 @@ static void DoSearch(Prog* prog, const StringPiece& match, } TEST(Multithreaded, SearchDFA) { + Prog::TEST_dfa_should_bail_when_slow(false); + // Same as single-threaded test above. const int n = 18; Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n), @@ -250,7 +249,6 @@ TEST(Multithreaded, SearchDFA) { CHECK(re); string no_match = DeBruijnString(n); string match = no_match + "0"; - FLAGS_re2_dfa_bail_when_slow = false; // Check that single-threaded code works. { @@ -279,6 +277,9 @@ TEST(Multithreaded, SearchDFA) { } re->Decref(); + + // Reset to original behaviour. + Prog::TEST_dfa_should_bail_when_slow(true); } struct ReverseTest { diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc index c3b2a3c..867eac6 100644 --- a/re2/testing/filtered_re2_test.cc +++ b/re2/testing/filtered_re2_test.cc @@ -13,11 +13,12 @@ #include "re2/filtered_re2.h" #include "re2/re2.h" -DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc - namespace re2 { struct FilterTestVars { + FilterTestVars() {} + explicit FilterTestVars(int min_atom_len) : f(min_atom_len) {} + std::vector<string> atoms; std::vector<int> atom_indices; std::vector<int> matches; @@ -27,14 +28,20 @@ struct FilterTestVars { TEST(FilteredRE2Test, EmptyTest) { FilterTestVars v; + + v.f.Compile(&v.atoms); + EXPECT_EQ(0, v.atoms.size()); + + // Compile has no effect at all when called before Add: it will not + // record that it has been called and it will not clear the vector. + // The second point does not matter here, but the first point means + // that an error will be logged during the call to AllMatches. v.f.AllMatches("foo", v.atom_indices, &v.matches); EXPECT_EQ(0, v.matches.size()); } TEST(FilteredRE2Test, SmallOrTest) { - FLAGS_filtered_re2_min_atom_len = 4; - - FilterTestVars v; + FilterTestVars v(4); // override the minimum atom length int id; v.f.Add("(foo|bar)", v.opts, &id); @@ -47,7 +54,6 @@ TEST(FilteredRE2Test, SmallOrTest) { } TEST(FilteredRE2Test, SmallLatinTest) { - FLAGS_filtered_re2_min_atom_len = 3; FilterTestVars v; int id; @@ -163,21 +169,19 @@ bool CheckExpectedAtoms(const char* atoms[], pass = pass && expected[i] == v->atoms[i]; if (!pass) { - LOG(WARNING) << "Failed " << testname; - LOG(WARNING) << "Expected #atoms = " << expected.size(); + LOG(ERROR) << "Failed " << testname; + LOG(ERROR) << "Expected #atoms = " << expected.size(); for (size_t i = 0; i < expected.size(); i++) - LOG(WARNING) << expected[i]; - LOG(WARNING) << "Found #atoms = " << v->atoms.size(); + LOG(ERROR) << expected[i]; + LOG(ERROR) << "Found #atoms = " << v->atoms.size(); for (size_t i = 0; i < v->atoms.size(); i++) - LOG(WARNING) << v->atoms[i]; + LOG(ERROR) << v->atoms[i]; } return pass; } TEST(FilteredRE2Test, AtomTests) { - FLAGS_filtered_re2_min_atom_len = 3; - int nfail = 0; for (int i = 0; i < arraysize(atom_tests); i++) { FilterTestVars v; @@ -211,8 +215,6 @@ void FindAtomIndices(const std::vector<string>& atoms, } TEST(FilteredRE2Test, MatchEmptyPattern) { - FLAGS_filtered_re2_min_atom_len = 3; - FilterTestVars v; AtomTest* t = &atom_tests[0]; // We are using the regexps used in one of the atom tests @@ -231,8 +233,6 @@ TEST(FilteredRE2Test, MatchEmptyPattern) { } TEST(FilteredRE2Test, MatchTests) { - FLAGS_filtered_re2_min_atom_len = 3; - FilterTestVars v; AtomTest* t = &atom_tests[2]; // We are using the regexps used in one of the atom tests @@ -4,7 +4,7 @@ success=true for i do printf "%-40s" $i - if sh -c "$i >$i.log 2>&1" 2>/dev/null + if $($i >$i.log 2>&1) 2>/dev/null then echo PASS else diff --git a/util/benchmark.cc b/util/benchmark.cc index 5ca715a..125bbe3 100644 --- a/util/benchmark.cc +++ b/util/benchmark.cc @@ -4,6 +4,7 @@ #include <stdint.h> #include <stdio.h> +#include <stdlib.h> #include <algorithm> #include <chrono> @@ -80,7 +81,7 @@ static void runN(Benchmark *b, int n, int siz) { b->fnr(n, siz); else { fprintf(stderr, "%s: missing function\n", b->name); - exit(2); + abort(); } if(t0 != 0) ns += nsec() - t0; diff --git a/util/logging.cc b/util/logging.cc deleted file mode 100644 index 8a59862..0000000 --- a/util/logging.cc +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright 2015 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "util/logging.h" - -DEFINE_int32(minloglevel, 0, // INFO - "Messages logged at a lower level than this don't actually get " - "logged anywhere"); diff --git a/util/logging.h b/util/logging.h index d1044b3..e9aa446 100644 --- a/util/logging.h +++ b/util/logging.h @@ -8,14 +8,12 @@ // Simplified version of Google's logging. #include <assert.h> -#include <stdio.h> /* for fwrite */ +#include <stdio.h> +#include <stdlib.h> #include <ostream> #include <sstream> #include "util/util.h" -#include "util/flags.h" - -DECLARE_int32(minloglevel); // Debug-only checking. #define DCHECK(condition) assert(condition) @@ -35,9 +33,9 @@ DECLARE_int32(minloglevel); #define CHECK_EQ(x, y) CHECK((x) == (y)) #define CHECK_NE(x, y) CHECK((x) != (y)) -#define LOG_INFO LogMessage(__FILE__, __LINE__, 0) -#define LOG_WARNING LogMessage(__FILE__, __LINE__, 1) -#define LOG_ERROR LogMessage(__FILE__, __LINE__, 2) +#define LOG_INFO LogMessage(__FILE__, __LINE__) +#define LOG_WARNING LogMessage(__FILE__, __LINE__) +#define LOG_ERROR LogMessage(__FILE__, __LINE__) #define LOG_FATAL LogMessageFatal(__FILE__, __LINE__) #define LOG_QFATAL LOG_FATAL @@ -58,17 +56,15 @@ DECLARE_int32(minloglevel); class LogMessage { public: - LogMessage(const char* file, int line, int severity) - : severity_(severity), flushed_(false) { + LogMessage(const char* file, int line) + : flushed_(false) { stream() << file << ":" << line << ": "; } void Flush() { stream() << "\n"; - if (severity_ >= re2::FLAGS_minloglevel) { - string s = str_.str(); - size_t n = s.size(); - if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc - } + string s = str_.str(); + size_t n = s.size(); + if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc flushed_ = true; } ~LogMessage() { @@ -79,7 +75,6 @@ class LogMessage { std::ostream& stream() { return str_; } private: - const int severity_; bool flushed_; std::ostringstream str_; @@ -97,7 +92,7 @@ class LogMessage { class LogMessageFatal : public LogMessage { public: LogMessageFatal(const char* file, int line) - : LogMessage(file, line, 3) {} + : LogMessage(file, line) {} ~LogMessageFatal() { Flush(); abort(); @@ -10,6 +10,13 @@ namespace re2 { +// Silence "truncation of constant value" warning for kMul in 32-bit mode. +// Since this is a header file, push and then pop to limit the scope. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4309) +#endif + class HashMix { public: HashMix() : hash_(1) {} @@ -25,6 +32,10 @@ class HashMix { size_t hash_; }; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + } // namespace re2 #endif // UTIL_MIX_H_ diff --git a/util/pcre.cc b/util/pcre.cc index 2d0f5df..fab0388 100644 --- a/util/pcre.cc +++ b/util/pcre.cc @@ -21,6 +21,13 @@ #include "util/pcre.h" #include "util/strutil.h" +// Silence warnings about the wacky formatting in the operator() functions. +// Note that we test for Clang first because it defines __GNUC__ as well. +#if defined(__clang__) +#elif defined(__GNUC__) && __GNUC__ >= 6 +#pragma GCC diagnostic ignored "-Wmisleading-indentation" +#endif + #define PCREPORT(level) LOG(level) // Default PCRE limits. @@ -730,10 +737,14 @@ int PCRE::NumberOfCapturingGroups() const { if (re_partial_ == NULL) return -1; int result; - CHECK(pcre_fullinfo(re_partial_, // The regular expression object - NULL, // We did not study the pattern - PCRE_INFO_CAPTURECOUNT, - &result) == 0); + int rc = pcre_fullinfo(re_partial_, // The regular expression object + NULL, // We did not study the pattern + PCRE_INFO_CAPTURECOUNT, + &result); + if (rc != 0) { + PCREPORT(ERROR) << "Unexpected return code: " << rc; + return -1; + } return result; } diff --git a/util/sparse_array.h b/util/sparse_array.h index 3b651cd..b9853a7 100644 --- a/util/sparse_array.h +++ b/util/sparse_array.h @@ -94,6 +94,7 @@ // // A moved-from SparseArray will be empty. +#include <assert.h> #include <stdint.h> #include <string.h> #include <algorithm> @@ -101,9 +102,6 @@ #include <utility> #include <vector> -#include "util/util.h" -#include "util/logging.h" - namespace re2 { template<typename Value> @@ -121,10 +119,10 @@ class SparseArray { typedef typename std::vector<IndexValue>::const_iterator const_iterator; SparseArray(const SparseArray& src); - SparseArray(SparseArray&& src) noexcept; + SparseArray(SparseArray&& src) /*noexcept*/; SparseArray& operator=(const SparseArray& src); - SparseArray& operator=(SparseArray&& src) noexcept; + SparseArray& operator=(SparseArray&& src) /*noexcept*/; const IndexValue& iv(int i) const; @@ -275,16 +273,14 @@ class SparseArray { iterator SetInternal(bool allow_overwrite, int i, U&& v) { // NOLINT DebugCheckInvariants(); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { - LOG(DFATAL) << "(jyasskin) Illegal index " << i - << " passed to SparseArray(" << max_size_ - << ").set" << (allow_overwrite?"":"_new") << "()."; + assert(!"illegal index"); // Semantically, end() would be better here, but we already know // the user did something stupid, so begin() insulates them from // dereferencing an invalid pointer. return begin(); } if (!allow_overwrite) { - DCHECK(!has_index(i)); + assert(!has_index(i)); create_index(i); } else { if (!has_index(i)) @@ -296,7 +292,7 @@ class SparseArray { template <typename U> iterator SetExistingInternal(int i, U&& v) { // NOLINT DebugCheckInvariants(); - DCHECK(has_index(i)); + assert(has_index(i)); dense_[sparse_to_dense_[i]].value() = std::forward<U>(v); DebugCheckInvariants(); return dense_.begin() + sparse_to_dense_[i]; @@ -333,7 +329,7 @@ SparseArray<Value>::SparseArray(const SparseArray& src) } template<typename Value> -SparseArray<Value>::SparseArray(SparseArray&& src) noexcept // NOLINT +SparseArray<Value>::SparseArray(SparseArray&& src) /*noexcept*/ // NOLINT : size_(src.size_), max_size_(src.max_size_), sparse_to_dense_(std::move(src.sparse_to_dense_)), @@ -356,7 +352,7 @@ SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) { template<typename Value> SparseArray<Value>& SparseArray<Value>::operator=( - SparseArray&& src) noexcept { // NOLINT + SparseArray&& src) /*noexcept*/ { // NOLINT size_ = src.size_; max_size_ = src.max_size_; sparse_to_dense_ = std::move(src.sparse_to_dense_); @@ -382,9 +378,9 @@ class SparseArray<Value>::IndexValue { int index() const { return index_; } - Value& value() & { return second; } - const Value& value() const & { return second; } - Value&& value() && { return std::move(second); } // NOLINT + Value& value() /*&*/ { return second; } + const Value& value() const /*&*/ { return second; } + //Value&& value() /*&&*/ { return std::move(second); } // NOLINT private: int index_; @@ -400,8 +396,8 @@ class SparseArray<Value>::IndexValue { template<typename Value> const typename SparseArray<Value>::IndexValue& SparseArray<Value>::iv(int i) const { - DCHECK_GE(i, 0); - DCHECK_LT(i, size_); + assert(i >= 0); + assert(i < size_); return dense_[i]; } @@ -435,8 +431,8 @@ void SparseArray<Value>::resize(int max_size) { // Check whether index i is in the array. template<typename Value> bool SparseArray<Value>::has_index(int i) const { - DCHECK_GE(i, 0); - DCHECK_LT(i, max_size_); + assert(i >= 0); + assert(i < max_size_); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { return false; } @@ -447,7 +443,7 @@ bool SparseArray<Value>::has_index(int i) const { template<typename Value> const Value& SparseArray<Value>::get_existing(int i) const { - DCHECK(has_index(i)); + assert(has_index(i)); return dense_[sparse_to_dense_[i]].second; } @@ -462,7 +458,7 @@ void SparseArray<Value>::erase(int i) { template<typename Value> void SparseArray<Value>::erase_existing(int i) { DebugCheckInvariants(); - DCHECK(has_index(i)); + assert(has_index(i)); int di = sparse_to_dense_[i]; if (di < size_ - 1) { dense_[di] = std::move(dense_[size_ - 1]); @@ -474,8 +470,8 @@ void SparseArray<Value>::erase_existing(int i) { template<typename Value> void SparseArray<Value>::create_index(int i) { - DCHECK(!has_index(i)); - DCHECK_LT(size_, max_size_); + assert(!has_index(i)); + assert(size_ < max_size_); sparse_to_dense_[i] = size_; dense_[size_].index_ = i; size_++; @@ -502,9 +498,9 @@ template<typename Value> SparseArray<Value>::~SparseArray() { } template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const { - DCHECK_LE(0, size_); - DCHECK_LE(size_, max_size_); - DCHECK(size_ == 0 || sparse_to_dense_ != NULL); + assert(0 <= size_); + assert(size_ <= max_size_); + assert(size_ == 0 || sparse_to_dense_ != NULL); } // Comparison function for sorting. diff --git a/util/sparse_set.h b/util/sparse_set.h index c52ab74..b431643 100644 --- a/util/sparse_set.h +++ b/util/sparse_set.h @@ -47,6 +47,7 @@ // // See sparse_array.h for implementation details. +#include <assert.h> #include <stdint.h> #include <string.h> #include <algorithm> @@ -54,9 +55,6 @@ #include <utility> #include <vector> -#include "util/util.h" -#include "util/logging.h" - namespace re2 { template<typename Value> @@ -134,16 +132,14 @@ class SparseSetT { iterator InsertInternal(bool allow_existing, int i) { DebugCheckInvariants(); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { - LOG(DFATAL) << "(jyasskin) Illegal index " << i - << " passed to SparseSet(" << max_size_ - << ").insert" << (allow_existing?"":"_new") << "()."; + assert(!"illegal index"); // Semantically, end() would be better here, but we already know // the user did something stupid, so begin() insulates them from // dereferencing an invalid pointer. return begin(); } if (!allow_existing) { - DCHECK(!contains(i)); + assert(!contains(i)); create_index(i); } else { if (!contains(i)) @@ -203,8 +199,8 @@ void SparseSetT<Value>::resize(int max_size) { // Check whether index i is in the set. template<typename Value> bool SparseSetT<Value>::contains(int i) const { - DCHECK_GE(i, 0); - DCHECK_LT(i, max_size_); + assert(i >= 0); + assert(i < max_size_); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) { return false; } @@ -215,8 +211,8 @@ bool SparseSetT<Value>::contains(int i) const { template<typename Value> void SparseSetT<Value>::create_index(int i) { - DCHECK(!contains(i)); - DCHECK_LT(size_, max_size_); + assert(!contains(i)); + assert(size_ < max_size_); sparse_to_dense_[i] = size_; dense_[size_] = i; size_++; @@ -243,9 +239,9 @@ template<typename Value> SparseSetT<Value>::~SparseSetT() { } template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const { - DCHECK_LE(0, size_); - DCHECK_LE(size_, max_size_); - DCHECK(size_ == 0 || sparse_to_dense_ != NULL); + assert(0 <= size_); + assert(size_ <= max_size_); + assert(size_ == 0 || sparse_to_dense_ != NULL); } // Comparison function for sorting. |