summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2016-11-21 16:59:50 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2016-11-21 16:59:50 +0900
commita96ee296357f1a382ea43fe4ca4f633d49e922cf (patch)
treefef7122e14840a504d4461423e3ca6512741f152
parent9408cea9ad30ce73c37268ab8e86ef2cededfadf (diff)
downloadre2-a96ee296357f1a382ea43fe4ca4f633d49e922cf.tar.gz
re2-a96ee296357f1a382ea43fe4ca4f633d49e922cf.tar.bz2
re2-a96ee296357f1a382ea43fe4ca4f633d49e922cf.zip
Imported Upstream version 20161001upstream/20161001
Change-Id: I4e7b71f50654c54bfd112ed1cb774c9c6bb732ea Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
-rw-r--r--BUILD1
-rw-r--r--CMakeLists.txt5
-rw-r--r--Makefile1
-rwxr-xr-xkokoro/windows/continuous-cmake.bat14
-rw-r--r--libre2.symbols2
-rw-r--r--libre2.symbols.darwin2
-rw-r--r--re2/bitstate.cc5
-rw-r--r--re2/compile.cc6
-rw-r--r--re2/dfa.cc144
-rw-r--r--re2/filtered_re2.cc16
-rw-r--r--re2/filtered_re2.h1
-rw-r--r--re2/fuzzing/re2_fuzzer.cc5
-rw-r--r--re2/nfa.cc35
-rw-r--r--re2/onepass.cc18
-rw-r--r--re2/prefilter.cc29
-rw-r--r--re2/prefilter.h2
-rw-r--r--re2/prefilter_tree.cc136
-rw-r--r--re2/prefilter_tree.h7
-rw-r--r--re2/prog.cc18
-rw-r--r--re2/prog.h17
-rw-r--r--re2/re2.cc125
-rw-r--r--re2/re2.h2
-rw-r--r--re2/regexp.h3
-rw-r--r--re2/set.cc14
-rw-r--r--re2/stringpiece.cc3
-rw-r--r--re2/testing/dfa_test.cc71
-rw-r--r--re2/testing/filtered_re2_test.cc34
-rwxr-xr-xruntests2
-rw-r--r--util/benchmark.cc3
-rw-r--r--util/logging.cc9
-rw-r--r--util/logging.h27
-rw-r--r--util/mix.h11
-rw-r--r--util/pcre.cc19
-rw-r--r--util/sparse_array.h48
-rw-r--r--util/sparse_set.h24
35 files changed, 375 insertions, 484 deletions
diff --git a/BUILD b/BUILD
index 1a8aecb..f4abb0a 100644
--- a/BUILD
+++ b/BUILD
@@ -40,7 +40,6 @@ cc_library(
"re2/unicode_groups.h",
"re2/walker-inl.h",
"util/flags.h",
- "util/logging.cc",
"util/logging.h",
"util/mix.h",
"util/mutex.h",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bf70d59..3c90f5d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,7 +18,9 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
message(FATAL_ERROR "you need Visual Studio 2013 or later")
endif()
if(BUILD_SHARED_LIBS)
- message(FATAL_ERROR "building a DLL is not currently supported")
+ # See http://www.kitware.com/blog/home/post/939 for details.
+ cmake_minimum_required(VERSION 3.4)
+ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-std=c++11)
@@ -60,7 +62,6 @@ set(RE2_SOURCES
re2/tostring.cc
re2/unicode_casefold.cc
re2/unicode_groups.cc
- util/logging.cc
util/rune.cc
util/strutil.cc
)
diff --git a/Makefile b/Makefile
index 56ef560..645b364 100644
--- a/Makefile
+++ b/Makefile
@@ -107,7 +107,6 @@ HFILES=\
re2/walker-inl.h\
OFILES=\
- obj/util/logging.o\
obj/util/rune.o\
obj/util/strutil.o\
obj/re2/bitstate.o\
diff --git a/kokoro/windows/continuous-cmake.bat b/kokoro/windows/continuous-cmake.bat
index 8f0f293..b6d2f6f 100755
--- a/kokoro/windows/continuous-cmake.bat
+++ b/kokoro/windows/continuous-cmake.bat
@@ -1,11 +1,11 @@
-CD git/re2 || EXIT /B 1
+CD git/re2 || EXIT /B 1
-cmake -D CMAKE_BUILD_TYPE=Debug -A x64 . || EXIT /B 1
-cmake --build . --config Debug --clean-first || EXIT /B 1
-ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1
+cmake -D CMAKE_BUILD_TYPE=Debug -G "Visual Studio 12 2013" -A x64 . || EXIT /B 1
+cmake --build . --config Debug --clean-first || EXIT /B 1
+ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1
-cmake -D CMAKE_BUILD_TYPE=Release -A x64 . || EXIT /B 1
-cmake --build . --config Release --clean-first || EXIT /B 1
-ctest -C Release --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1
+cmake -D CMAKE_BUILD_TYPE=Release -G "Visual Studio 12 2013" -A x64 . || EXIT /B 1
+cmake --build . --config Release --clean-first || EXIT /B 1
+ctest -C Release --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1
EXIT /B 0
diff --git a/libre2.symbols b/libre2.symbols
index 90a1020..1224baa 100644
--- a/libre2.symbols
+++ b/libre2.symbols
@@ -11,8 +11,6 @@
# re2::FilteredRE2*
_ZN3re211FilteredRE2*;
_ZNK3re211FilteredRE2*;
- # flags
- _ZN3re2*FLAGS_*;
local:
*;
};
diff --git a/libre2.symbols.darwin b/libre2.symbols.darwin
index 4207f87..a4d942f 100644
--- a/libre2.symbols.darwin
+++ b/libre2.symbols.darwin
@@ -15,5 +15,3 @@ __Zls*RKN3re211StringPieceE
# re2::FilteredRE2*
__ZN3re211FilteredRE2*
__ZNK3re211FilteredRE2*
-# flags
-__ZN3re2*FLAGS_*
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 4552d17..4b92fee 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -108,7 +108,6 @@ bool BitState::ShouldVisit(int id, const char* p) {
// Grow the stack.
bool BitState::GrowStack() {
- // VLOG(0) << "Reallocate.";
maxjob_ *= 2;
Job* newjob = new Job[maxjob_];
memmove(newjob, job_, njob_*sizeof job_[0]);
@@ -179,8 +178,6 @@ bool BitState::TrySearch(int id0, const char* p0) {
}
// Visit ip, p.
- // VLOG(0) << "Job: " << ip->id() << " "
- // << (p - text_.begin()) << " " << arg;
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
@@ -274,7 +271,6 @@ bool BitState::TrySearch(int id0, const char* p0) {
if (endmatch_ && p != text_.end())
goto Next;
- // VLOG(0) << "Found match.";
// We found a match. If the caller doesn't care
// where the match is, no point going further.
if (nsubmatch_ == 0)
@@ -334,7 +330,6 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits;
visited_ = new uint32_t[nvisited_];
memset(visited_, 0, nvisited_*sizeof visited_[0]);
- // VLOG(0) << "nvisited_ = " << nvisited_;
ncap_ = 2*nsubmatch;
if (ncap_ < 2)
diff --git a/re2/compile.cc b/re2/compile.cc
index a99c909..472bbfa 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc
@@ -1266,11 +1266,11 @@ Prog* Compiler::CompileSet(const RE2::Options& options, RE2::Anchor anchor,
// Make sure DFA has enough memory to operate,
// since we're not going to fall back to the NFA.
- bool failed;
+ bool dfa_failed = false;
StringPiece sp = "hello, world";
prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch,
- NULL, &failed, NULL);
- if (failed) {
+ NULL, &dfa_failed, NULL);
+ if (dfa_failed) {
delete prog;
return NULL;
}
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 7468534..1fc8a5a 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -29,13 +29,13 @@
#include <algorithm>
#include <atomic>
#include <map>
+#include <mutex>
#include <new>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
-#include "util/flags.h"
#include "util/logging.h"
#include "util/mix.h"
#include "util/mutex.h"
@@ -44,10 +44,6 @@
#include "re2/prog.h"
#include "re2/stringpiece.h"
-DEFINE_bool(re2_dfa_bail_when_slow, true,
- "Whether the RE2 DFA should bail out early "
- "if the NFA would be faster (for testing).");
-
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
#ifdef _MSC_VER
#pragma warning(disable: 4200)
@@ -66,9 +62,12 @@ static void* memrchr(const void* s, int c, size_t n) {
}
#endif
+// Controls whether the DFA should bail out early if the NFA would be faster.
+static bool dfa_should_bail_when_slow = true;
+
// Changing this to true compiles in prints that trace execution of the DFA.
// Generates a lot of output -- only useful for debugging.
-static const bool DebugDFA = false;
+static const bool ExtraDebug = false;
// A DFA implementation of a regular expression program.
// Since this is entirely a forward declaration mandated by C++,
@@ -354,7 +353,6 @@ class DFA {
int64_t state_budget_; // Amount of memory remaining for new States.
StateSet state_cache_; // All States computed so far.
StartInfo start_[kMaxStart];
- bool cache_warned_; // have printed to LOG(INFO) about the cache
};
// Shorthand for casting to uint8_t*.
@@ -432,9 +430,8 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
q0_(NULL),
q1_(NULL),
astack_(NULL),
- mem_budget_(max_mem),
- cache_warned_(false) {
- if (DebugDFA)
+ mem_budget_(max_mem) {
+ if (ExtraDebug)
fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
int nmark = 0;
if (kind_ == Prog::kLongestMatch)
@@ -451,8 +448,6 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
(sizeof(int)+sizeof(int)) * 2; // q0, q1
mem_budget_ -= nastack_ * sizeof(int); // astack
if (mem_budget_ < 0) {
- LOG(INFO) << "DFA out of memory: prog size " << prog_->size()
- << " mem " << max_mem;
init_failed_ = true;
return;
}
@@ -469,8 +464,6 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
(prog_->list_count()+nmark)*sizeof(int);
if (state_budget_ < 20*one_state) {
- LOG(INFO) << "DFA out of memory: prog size " << prog_->size()
- << " mem " << max_mem;
init_failed_ = true;
return;
}
@@ -611,7 +604,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint32_t flag) {
uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
bool sawmatch = false; // whether queue contains guaranteed kInstMatch
bool sawmark = false; // whether queue contains a Mark
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag);
for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
int id = *it;
@@ -637,7 +630,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint32_t flag) {
(kind_ != Prog::kLongestMatch || !sawmark) &&
(flag & kFlagMatch)) {
delete[] inst;
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, " -> FullMatchState\n");
return FullMatchState;
}
@@ -684,7 +677,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint32_t flag) {
// if the state is *not* a matching state.
if (n == 0 && flag == 0) {
delete[] inst;
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, " -> DeadState\n");
return DeadState;
}
@@ -729,7 +722,7 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
state.flag_ = flag;
StateSet::iterator it = state_cache_.find(&state);
if (it != state_cache_.end()) {
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str());
return *it;
}
@@ -760,7 +753,7 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
memmove(s->inst_, inst, ninst*sizeof s->inst_[0]);
s->ninst_ = ninst;
s->flag_ = flag;
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, " -> %s\n", DumpState(s).c_str());
// Put state in cache and return it.
@@ -952,7 +945,7 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
}
}
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(),
c, flag, DumpWorkq(newq).c_str(), *ismatch);
}
@@ -1098,11 +1091,6 @@ class DFA::RWLocker {
// Notice that the lock is *released* temporarily.
void LockForWriting();
- // Returns whether the lock is already held for writing.
- bool IsLockedForWriting() {
- return writing_;
- }
-
private:
Mutex* mu_;
bool writing_;
@@ -1111,9 +1099,7 @@ class DFA::RWLocker {
RWLocker& operator=(const RWLocker&) = delete;
};
-DFA::RWLocker::RWLocker(Mutex* mu)
- : mu_(mu), writing_(false) {
-
+DFA::RWLocker::RWLocker(Mutex* mu) : mu_(mu), writing_(false) {
mu_->ReaderLock();
}
@@ -1128,10 +1114,10 @@ void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
}
DFA::RWLocker::~RWLocker() {
- if (writing_)
- mu_->WriterUnlock();
- else
+ if (!writing_)
mu_->ReaderUnlock();
+ else
+ mu_->WriterUnlock();
}
@@ -1148,20 +1134,8 @@ DFA::RWLocker::~RWLocker() {
void DFA::ResetCache(RWLocker* cache_lock) {
// Re-acquire the cache_mutex_ for writing (exclusive use).
- bool was_writing = cache_lock->IsLockedForWriting();
cache_lock->LockForWriting();
- // If we already held cache_mutex_ for writing, it means
- // this invocation of Search() has already reset the
- // cache once already. That's a pretty clear indication
- // that the cache is too small. Warn about that, once.
- // TODO(rsc): Only warn if state_cache_.size() < some threshold.
- if (was_writing && !cache_warned_) {
- LOG(INFO) << "DFA memory cache could be too small: "
- << "only room for " << state_cache_.size() << " states.";
- cache_warned_ = true;
- }
-
// Clear the cache, reset the memory budget.
for (int i = 0; i < kMaxStart; i++) {
start_[i].start = NULL;
@@ -1343,7 +1317,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
}
while (p != ep) {
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, "@%td: %s\n",
p - bp, DumpState(s).c_str());
if (have_firstbyte && s == start) {
@@ -1401,7 +1375,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
// same at about 2 MB/s. Unless we're processing an average
// of 10 bytes per state computation, fail so that RE2 can
// fall back to the NFA.
- if (FLAGS_re2_dfa_bail_when_slow && resetp != NULL &&
+ if (dfa_should_bail_when_slow && resetp != NULL &&
static_cast<size_t>(p - resetp) < 10*state_cache_.size()) {
params->failed = true;
return false;
@@ -1449,7 +1423,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
lastmatch = p - 1;
else
lastmatch = p + 1;
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, "match @%td! [%s]\n",
lastmatch - bp, DumpState(s).c_str());
@@ -1494,7 +1468,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
}
}
s = ns;
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, "@_: %s\n", DumpState(s).c_str());
if (s == FullMatchState) {
params->ep = reinterpret_cast<const char*>(ep);
@@ -1517,7 +1491,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
}
}
}
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, "match @%td! [%s]\n",
lastmatch - bp, DumpState(s).c_str());
}
@@ -1615,7 +1589,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
// Sanity check: make sure that text lies within context.
if (text.begin() < context.begin() || text.end() > context.end()) {
- LOG(DFATAL) << "Text is not inside context.";
+ LOG(DFATAL) << "context does not contain text";
params->start = DeadState;
return true;
}
@@ -1668,7 +1642,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
}
}
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n",
params->anchored, params->run_forward, flags,
DumpState(info->start).c_str(), info->firstbyte.load());
@@ -1755,7 +1729,7 @@ bool DFA::Search(const StringPiece& text,
}
*failed = false;
- if (DebugDFA) {
+ if (ExtraDebug) {
fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
text.ToString().c_str(), anchored, want_earliest_match,
@@ -1782,7 +1756,7 @@ bool DFA::Search(const StringPiece& text,
*epp = text.end();
return true;
}
- if (DebugDFA)
+ if (ExtraDebug)
fprintf(stderr, "start %s\n", DumpState(params.start).c_str());
bool ret = FastSearchLoop(&params);
if (params.failed) {
@@ -1794,24 +1768,6 @@ bool DFA::Search(const StringPiece& text,
}
DFA* Prog::GetDFA(MatchKind kind) {
- std::atomic<DFA*>* pdfa;
- if (kind == kFirstMatch || kind == kManyMatch) {
- pdfa = &dfa_first_;
- } else {
- kind = kLongestMatch;
- pdfa = &dfa_longest_;
- }
-
- // Quick check.
- DFA* dfa = pdfa->load(std::memory_order_acquire);
- if (dfa != NULL)
- return dfa;
-
- MutexLock l(&dfa_mutex_);
- dfa = pdfa->load(std::memory_order_relaxed);
- if (dfa != NULL)
- return dfa;
-
// For a forward DFA, half the memory goes to each DFA.
// However, if it is a "many match" DFA, then there is
// no counterpart with which the memory must be shared.
@@ -1819,25 +1775,29 @@ DFA* Prog::GetDFA(MatchKind kind) {
// For a reverse DFA, all the memory goes to the
// "longest match" DFA, because RE2 never does reverse
// "first match" searches.
- int64_t m = dfa_mem_;
- if (reversed_) {
- DCHECK_EQ(kind, kLongestMatch);
- } else if (kind == kFirstMatch || kind == kLongestMatch) {
- m /= 2;
+ if (kind == kFirstMatch) {
+ std::call_once(dfa_first_once_, [](Prog* prog) {
+ prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2);
+ }, this);
+ return dfa_first_;
+ } else if (kind == kManyMatch) {
+ std::call_once(dfa_first_once_, [](Prog* prog) {
+ prog->dfa_first_ = new DFA(prog, kManyMatch, prog->dfa_mem_);
+ }, this);
+ return dfa_first_;
} else {
- DCHECK_EQ(kind, kManyMatch);
+ std::call_once(dfa_longest_once_, [](Prog* prog) {
+ if (!prog->reversed_)
+ prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_ / 2);
+ else
+ prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_);
+ }, this);
+ return dfa_longest_;
}
- dfa = new DFA(this, kind, m);
-
- // Synchronize with "quick check" above.
- pdfa->store(dfa, std::memory_order_release);
- return dfa;
}
-void Prog::DeleteDFA(std::atomic<DFA*>* pdfa) {
- DFA* dfa = pdfa->load(std::memory_order_relaxed);
- if (dfa != NULL)
- delete dfa;
+void Prog::DeleteDFA(DFA* dfa) {
+ delete dfa;
}
// Executes the regexp program to search in text,
@@ -1955,6 +1915,10 @@ int Prog::BuildEntireDFA(MatchKind kind) {
return GetDFA(kind)->BuildAllStates();
}
+void Prog::TEST_dfa_should_bail_when_slow(bool b) {
+ dfa_should_bail_when_slow = b;
+}
+
// Computes min and max for matching string.
// Won't return strings bigger than maxlen.
bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
@@ -2020,11 +1984,8 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
min->clear();
MutexLock lock(&mutex_);
for (int i = 0; i < maxlen; i++) {
- if (previously_visited_states[s] > kMaxEltRepetitions) {
- VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions
- << " for state s=" << s << " and min=" << CEscape(*min);
+ if (previously_visited_states[s] > kMaxEltRepetitions)
break;
- }
previously_visited_states[s]++;
// Stop if min is a match.
@@ -2057,11 +2018,8 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
s = params.start;
max->clear();
for (int i = 0; i < maxlen; i++) {
- if (previously_visited_states[s] > kMaxEltRepetitions) {
- VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions
- << " for state s=" << s << " and max=" << CEscape(*max);
+ if (previously_visited_states[s] > kMaxEltRepetitions)
break;
- }
previously_visited_states[s] += 1;
// Try to extend the string with high bytes.
diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc
index 8caf992..12f638a 100644
--- a/re2/filtered_re2.cc
+++ b/re2/filtered_re2.cc
@@ -19,6 +19,11 @@ FilteredRE2::FilteredRE2()
prefilter_tree_(new PrefilterTree()) {
}
+FilteredRE2::FilteredRE2(int min_atom_len)
+ : compiled_(false),
+ prefilter_tree_(new PrefilterTree(min_atom_len)) {
+}
+
FilteredRE2::~FilteredRE2() {
for (size_t i = 0; i < re2_vec_.size(); i++)
delete re2_vec_[i];
@@ -45,8 +50,13 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
}
void FilteredRE2::Compile(std::vector<string>* atoms) {
- if (compiled_ || re2_vec_.size() == 0) {
- LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size();
+ if (compiled_) {
+ LOG(ERROR) << "Compile called already.";
+ return;
+ }
+
+ if (re2_vec_.empty()) {
+ LOG(ERROR) << "Compile called before Add.";
return;
}
@@ -69,7 +79,7 @@ int FilteredRE2::SlowFirstMatch(const StringPiece& text) const {
int FilteredRE2::FirstMatch(const StringPiece& text,
const std::vector<int>& atoms) const {
if (!compiled_) {
- LOG(DFATAL) << "FirstMatch called before Compile";
+ LOG(DFATAL) << "FirstMatch called before Compile.";
return -1;
}
std::vector<int> regexps;
diff --git a/re2/filtered_re2.h b/re2/filtered_re2.h
index 48a6fd8..b1317cc 100644
--- a/re2/filtered_re2.h
+++ b/re2/filtered_re2.h
@@ -33,6 +33,7 @@ class PrefilterTree;
class FilteredRE2 {
public:
FilteredRE2();
+ explicit FilteredRE2(int min_atom_len);
~FilteredRE2();
// Uses RE2 constructor to create a RE2 object (re). Returns
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 849e79d..6bf771f 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -8,9 +8,7 @@
#include <string>
#include "re2/re2.h"
-#include "util/logging.h"
-using re2::FLAGS_minloglevel;
using re2::StringPiece;
using std::string;
@@ -54,9 +52,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size == 0 || size > 1000000)
return 0;
- // Suppress logging below FATAL severity.
- FLAGS_minloglevel = 3;
-
// The one-at-a-time hash by Bob Jenkins.
uint32_t hash = 0;
for (size_t i = 0; i < size; i++) {
diff --git a/re2/nfa.cc b/re2/nfa.cc
index 0693cd8..31481da 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -40,6 +40,8 @@
namespace re2 {
+static const bool ExtraDebug = false;
+
class NFA {
public:
NFA(Prog* prog);
@@ -60,8 +62,6 @@ class NFA {
bool anchored, bool longest,
StringPiece* submatch, int nsubmatch);
- static const int Debug = 0;
-
private:
struct Thread {
union {
@@ -238,7 +238,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag,
if (id == 0)
continue;
if (q->has_index(id)) {
- if (Debug)
+ if (ExtraDebug)
fprintf(stderr, " [%d%s]\n", id, FormatCapture(t0->capture).c_str());
continue;
}
@@ -304,7 +304,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag,
// Save state; will pick up at next byte.
t = Incref(t0);
*tp = t;
- if (Debug)
+ if (ExtraDebug)
fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str());
Next:
@@ -435,12 +435,6 @@ string NFA::FormatCapture(const char** capture) {
return s;
}
-// Returns whether haystack contains needle's memory.
-static bool StringPieceContains(const StringPiece haystack, const StringPiece needle) {
- return haystack.begin() <= needle.begin() &&
- haystack.end() >= needle.end();
-}
-
bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
bool anchored, bool longest,
StringPiece* submatch, int nsubmatch) {
@@ -451,12 +445,9 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
if (context.begin() == NULL)
context = text;
- if (!StringPieceContains(context, text)) {
- LOG(FATAL) << "Bad args: context does not contain text "
- << reinterpret_cast<const void*>(context.begin())
- << "+" << context.size() << " "
- << reinterpret_cast<const void*>(text.begin())
- << "+" << text.size();
+ // Sanity check: make sure that text lies within context.
+ if (text.begin() < context.begin() || text.end() > context.end()) {
+ LOG(DFATAL) << "context does not contain text";
return false;
}
@@ -493,11 +484,10 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
// For debugging prints.
btext_ = context.begin();
- if (Debug) {
+ if (ExtraDebug)
fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
text.ToString().c_str(), context.ToString().c_str(), anchored,
longest);
- }
// Set up search.
Threadq* runq = &q0_;
@@ -537,7 +527,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
else
flag |= kEmptyNonWordBoundary;
- if (Debug) {
+ if (ExtraDebug) {
int c = 0;
if (p == context.begin())
c = '^';
@@ -629,7 +619,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
// If all the threads have died, stop early.
if (runq->size() == 0) {
- if (Debug)
+ if (ExtraDebug)
fprintf(stderr, "dead\n");
break;
}
@@ -645,12 +635,11 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
submatch[i] =
StringPiece(match_[2 * i],
static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
- if (Debug)
+ if (ExtraDebug)
fprintf(stderr, "match (%td,%td)\n",
match_[0] - btext_, match_[1] - btext_);
return true;
}
- VLOG(1) << "No matches found";
return false;
}
@@ -719,7 +708,7 @@ bool
Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
Anchor anchor, MatchKind kind,
StringPiece* match, int nmatch) {
- if (NFA::Debug)
+ if (ExtraDebug)
Dump();
NFA nfa(this);
diff --git a/re2/onepass.cc b/re2/onepass.cc
index 65eb937..10dc642 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc
@@ -72,7 +72,7 @@
namespace re2 {
-static const int Debug = 0;
+static const bool ExtraDebug = false;
// The key insight behind this implementation is that the
// non-determinism in an NFA for a one-pass regular expression
@@ -460,7 +460,7 @@ bool Prog::IsOnePass() {
int nextindex = nodebyid[ip->out()];
if (nextindex == -1) {
if (nalloc >= maxnodes) {
- if (Debug)
+ if (ExtraDebug)
LOG(ERROR) << StringPrintf(
"Not OnePass: hit node limit %d >= %d", nalloc, maxnodes);
goto fail;
@@ -485,10 +485,9 @@ bool Prog::IsOnePass() {
if ((act & kImpossible) == kImpossible) {
node->action[b] = newact;
} else if (act != newact) {
- if (Debug) {
+ if (ExtraDebug)
LOG(ERROR) << StringPrintf(
"Not OnePass: conflict on byte %#x at state %d", c, *it);
- }
goto fail;
}
}
@@ -507,10 +506,9 @@ bool Prog::IsOnePass() {
if ((act & kImpossible) == kImpossible) {
node->action[b] = newact;
} else if (act != newact) {
- if (Debug) {
+ if (ExtraDebug)
LOG(ERROR) << StringPrintf(
"Not OnePass: conflict on byte %#x at state %d", c, *it);
- }
goto fail;
}
}
@@ -549,10 +547,9 @@ bool Prog::IsOnePass() {
// If already on work queue, (1) is violated: bail out.
if (!AddQ(&workq, ip->out())) {
- if (Debug) {
+ if (ExtraDebug)
LOG(ERROR) << StringPrintf(
"Not OnePass: multiple paths %d -> %d\n", *it, ip->out());
- }
goto fail;
}
id = ip->out();
@@ -561,10 +558,9 @@ bool Prog::IsOnePass() {
case kInstMatch:
if (matched) {
// (3) is violated
- if (Debug) {
+ if (ExtraDebug)
LOG(ERROR) << StringPrintf(
"Not OnePass: multiple matches from %d\n", *it);
- }
goto fail;
}
matched = true;
@@ -584,7 +580,7 @@ bool Prog::IsOnePass() {
}
}
- if (Debug) { // For debugging, dump one-pass NFA to LOG(ERROR).
+ if (ExtraDebug) { // For debugging, dump one-pass NFA to LOG(ERROR).
LOG(ERROR) << "bytemap:\n" << DumpByteMap();
LOG(ERROR) << "prog:\n" << Dump();
diff --git a/re2/prefilter.cc b/re2/prefilter.cc
index eaf4a27..dbbbc97 100644
--- a/re2/prefilter.cc
+++ b/re2/prefilter.cc
@@ -19,7 +19,7 @@
namespace re2 {
-static const int Trace = false;
+static const bool ExtraDebug = false;
typedef std::set<string>::iterator SSIter;
typedef std::set<string>::const_iterator ConstSSIter;
@@ -30,13 +30,10 @@ Prefilter::Prefilter(Op op) {
subs_ = NULL;
if (op_ == AND || op_ == OR)
subs_ = new std::vector<Prefilter*>;
-
- VLOG(10) << "constructed: " << reinterpret_cast<intptr_t>(this);
}
// Destroys a Prefilter.
Prefilter::~Prefilter() {
- VLOG(10) << "destructing: " << reinterpret_cast<intptr_t>(this);
if (subs_) {
for (size_t i = 0; i < subs_->size(); i++)
delete (*subs_)[i];
@@ -456,10 +453,10 @@ Prefilter::Info* Prefilter::Info::EmptyString() {
typedef CharClass::iterator CCIter;
Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
bool latin1) {
- if (Trace) {
- VLOG(0) << "CharClassInfo:";
+ if (ExtraDebug) {
+ LOG(ERROR) << "CharClassInfo:";
for (CCIter i = cc->begin(); i != cc->end(); ++i)
- VLOG(0) << " " << i->lo << "-" << i->hi;
+ LOG(ERROR) << " " << i->lo << "-" << i->hi;
}
// If the class is too large, it's okay to overestimate.
@@ -479,9 +476,8 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
a->is_exact_ = true;
- if (Trace) {
- VLOG(0) << " = " << a->ToString();
- }
+ if (ExtraDebug)
+ LOG(ERROR) << " = " << a->ToString();
return a;
}
@@ -508,9 +504,8 @@ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
};
Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
- if (Trace) {
- LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();
- }
+ if (ExtraDebug)
+ LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString();
bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
Prefilter::Info::Walker w(latin1);
@@ -612,7 +607,6 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
info = child_args[0];
for (int i = 1; i < nchild_args; i++)
info = Alt(info, child_args[i]);
- VLOG(10) << "Alt: " << info->ToString();
break;
case kRegexpStar:
@@ -642,10 +636,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
break;
}
- if (Trace) {
- VLOG(0) << "BuildInfo " << re->ToString()
- << ": " << (info ? info->ToString() : "");
- }
+ if (ExtraDebug)
+ LOG(ERROR) << "BuildInfo " << re->ToString()
+ << ": " << (info ? info->ToString() : "");
return info;
}
diff --git a/re2/prefilter.h b/re2/prefilter.h
index f400e4f..ead09e1 100644
--- a/re2/prefilter.h
+++ b/re2/prefilter.h
@@ -43,7 +43,7 @@ class Prefilter {
// The children of the Prefilter node.
std::vector<Prefilter*>* subs() {
- CHECK(op_ == AND || op_ == OR);
+ DCHECK(op_ == AND || op_ == OR);
return subs_;
}
diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc
index 4238674..885cfb6 100644
--- a/re2/prefilter_tree.cc
+++ b/re2/prefilter_tree.cc
@@ -14,19 +14,22 @@
#include <vector>
#include "util/util.h"
-#include "util/flags.h"
#include "util/logging.h"
#include "re2/prefilter.h"
#include "re2/re2.h"
-DEFINE_int32(filtered_re2_min_atom_len,
- 3,
- "Strings less than this length are not stored as atoms");
-
namespace re2 {
+static const bool ExtraDebug = false;
+
PrefilterTree::PrefilterTree()
- : compiled_(false) {
+ : compiled_(false),
+ min_atom_len_(3) {
+}
+
+PrefilterTree::PrefilterTree(int min_atom_len)
+ : compiled_(false),
+ min_atom_len_(min_atom_len) {
}
PrefilterTree::~PrefilterTree() {
@@ -37,62 +40,22 @@ PrefilterTree::~PrefilterTree() {
delete entries_[i].parents;
}
-// Functions used for adding and Compiling prefilters to the
-// PrefilterTree.
-static bool KeepPart(Prefilter* prefilter, int level) {
- if (prefilter == NULL)
- return false;
-
- switch (prefilter->op()) {
- default:
- LOG(DFATAL) << "Unexpected op in KeepPart: "
- << prefilter->op();
- return false;
-
- case Prefilter::ALL:
- return false;
-
- case Prefilter::ATOM:
- return prefilter->atom().size() >=
- static_cast<size_t>(FLAGS_filtered_re2_min_atom_len);
-
- case Prefilter::AND: {
- int j = 0;
- std::vector<Prefilter*>* subs = prefilter->subs();
- for (size_t i = 0; i < subs->size(); i++)
- if (KeepPart((*subs)[i], level + 1))
- (*subs)[j++] = (*subs)[i];
- else
- delete (*subs)[i];
-
- subs->resize(j);
- return j > 0;
- }
-
- case Prefilter::OR:
- for (size_t i = 0; i < prefilter->subs()->size(); i++)
- if (!KeepPart((*prefilter->subs())[i], level + 1))
- return false;
- return true;
- }
-}
-
-void PrefilterTree::Add(Prefilter *f) {
+void PrefilterTree::Add(Prefilter* prefilter) {
if (compiled_) {
- LOG(DFATAL) << "Add after Compile.";
+ LOG(DFATAL) << "Add called after Compile.";
return;
}
- if (f != NULL && !KeepPart(f, 0)) {
- delete f;
- f = NULL;
+ if (prefilter != NULL && !KeepNode(prefilter)) {
+ delete prefilter;
+ prefilter = NULL;
}
- prefilter_vec_.push_back(f);
+ prefilter_vec_.push_back(prefilter);
}
void PrefilterTree::Compile(std::vector<string>* atom_vec) {
if (compiled_) {
- LOG(DFATAL) << "Compile after Compile.";
+ LOG(DFATAL) << "Compile called already.";
return;
}
@@ -136,7 +99,8 @@ void PrefilterTree::Compile(std::vector<string>* atom_vec) {
}
}
- PrintDebugInfo();
+ if (ExtraDebug)
+ PrintDebugInfo();
}
Prefilter* PrefilterTree::CanonicalNode(Prefilter* node) {
@@ -162,6 +126,42 @@ string PrefilterTree::NodeString(Prefilter* node) const {
return s;
}
+bool PrefilterTree::KeepNode(Prefilter* node) const {
+ if (node == NULL)
+ return false;
+
+ switch (node->op()) {
+ default:
+ LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op();
+ return false;
+
+ case Prefilter::ALL:
+ return false;
+
+ case Prefilter::ATOM:
+ return node->atom().size() >= static_cast<size_t>(min_atom_len_);
+
+ case Prefilter::AND: {
+ int j = 0;
+ std::vector<Prefilter*>* subs = node->subs();
+ for (size_t i = 0; i < subs->size(); i++)
+ if (KeepNode((*subs)[i]))
+ (*subs)[j++] = (*subs)[i];
+ else
+ delete (*subs)[i];
+
+ subs->resize(j);
+ return j > 0;
+ }
+
+ case Prefilter::OR:
+ for (size_t i = 0; i < node->subs()->size(); i++)
+ if (!KeepNode((*node->subs())[i]))
+ return false;
+ return true;
+ }
+}
+
void PrefilterTree::AssignUniqueIds(std::vector<string>* atom_vec) {
atom_vec->clear();
@@ -294,7 +294,7 @@ void PrefilterTree::RegexpsGivenStrings(
std::vector<int>* regexps) const {
regexps->clear();
if (!compiled_) {
- LOG(WARNING) << "Compile() not called";
+ LOG(ERROR) << "RegexpsGivenStrings called before Compile.";
for (size_t i = 0; i < prefilter_vec_.size(); ++i)
regexps->push_back(static_cast<int>(i));
} else {
@@ -303,7 +303,6 @@ void PrefilterTree::RegexpsGivenStrings(
std::vector<int> matched_atom_ids;
for (size_t j = 0; j < matched_atoms.size(); j++) {
matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);
- VLOG(10) << "Atom id:" << atom_index_to_id_[matched_atoms[j]];
}
PropagateMatch(matched_atom_ids, &regexps_map);
for (IntMap::iterator it = regexps_map.begin();
@@ -325,12 +324,9 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
work.set(atom_ids[i], 1);
for (IntMap::iterator it = work.begin(); it != work.end(); ++it) {
const Entry& entry = entries_[it->index()];
- VLOG(10) << "Processing: " << it->index();
// Record regexps triggered.
- for (size_t i = 0; i < entry.regexps.size(); i++) {
- VLOG(10) << "Regexp triggered: " << entry.regexps[i];
+ for (size_t i = 0; i < entry.regexps.size(); i++)
regexps->set(entry.regexps[i], 1);
- }
int c;
// Pass trigger up to parents.
for (StdIntMap::iterator it = entry.parents->begin();
@@ -338,7 +334,6 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
++it) {
int j = it->first;
const Entry& parent = entries_[j];
- VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count;
// Delay until all the children have succeeded.
if (parent.propagate_up_at_count > 1) {
if (count.has_index(j)) {
@@ -351,7 +346,6 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
if (c < parent.propagate_up_at_count)
continue;
}
- VLOG(10) << "Triggering: " << j;
// Trigger the parent.
work.set(j, 1);
}
@@ -360,26 +354,26 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
// Debugging help.
void PrefilterTree::PrintPrefilter(int regexpid) {
- LOG(INFO) << DebugNodeString(prefilter_vec_[regexpid]);
+ LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
}
void PrefilterTree::PrintDebugInfo() {
- VLOG(10) << "#Unique Atoms: " << atom_index_to_id_.size();
- VLOG(10) << "#Unique Nodes: " << entries_.size();
+ LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
+ LOG(ERROR) << "#Unique Nodes: " << entries_.size();
for (size_t i = 0; i < entries_.size(); ++i) {
StdIntMap* parents = entries_[i].parents;
const std::vector<int>& regexps = entries_[i].regexps;
- VLOG(10) << "EntryId: " << i
- << " N: " << parents->size() << " R: " << regexps.size();
+ LOG(ERROR) << "EntryId: " << i
+ << " N: " << parents->size() << " R: " << regexps.size();
for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
- VLOG(10) << it->first;
+ LOG(ERROR) << it->first;
}
- VLOG(10) << "Map:";
+ LOG(ERROR) << "Map:";
for (std::map<string, Prefilter*>::const_iterator iter = node_map_.begin();
iter != node_map_.end(); ++iter)
- VLOG(10) << "NodeId: " << (*iter).second->unique_id()
- << " Str: " << (*iter).first;
+ LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
+ << " Str: " << (*iter).first;
}
string PrefilterTree::DebugNodeString(Prefilter* node) const {
diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h
index 276d934..4c60801 100644
--- a/re2/prefilter_tree.h
+++ b/re2/prefilter_tree.h
@@ -33,6 +33,7 @@ class Prefilter;
class PrefilterTree {
public:
PrefilterTree();
+ explicit PrefilterTree(int min_atom_len);
~PrefilterTree();
// Adds the prefilter for the next regexp. Note that we assume that
@@ -84,6 +85,9 @@ class PrefilterTree {
};
private:
+ // Returns true if the prefilter node should be kept.
+ bool KeepNode(Prefilter* node) const;
+
// This function assigns unique ids to various parts of the
// prefilter, by looking at if these nodes are already in the
// PrefilterTree.
@@ -127,6 +131,9 @@ class PrefilterTree {
// Has the prefilter tree been compiled.
bool compiled_;
+ // Strings less than this length are not stored as atoms.
+ const int min_atom_len_;
+
PrefilterTree(const PrefilterTree&) = delete;
PrefilterTree& operator=(const PrefilterTree&) = delete;
};
diff --git a/re2/prog.cc b/re2/prog.cc
index bd83422..714bff8 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -114,14 +114,14 @@ Prog::Prog()
list_count_(0),
inst_(NULL),
onepass_nodes_(NULL),
+ dfa_mem_(0),
dfa_first_(NULL),
- dfa_longest_(NULL),
- dfa_mem_(0) {
+ dfa_longest_(NULL) {
}
Prog::~Prog() {
- DeleteDFA(&dfa_first_);
- DeleteDFA(&dfa_longest_);
+ DeleteDFA(dfa_longest_);
+ DeleteDFA(dfa_first_);
delete[] onepass_nodes_;
delete[] inst_;
}
@@ -190,9 +190,9 @@ string Prog::DumpByteMap() {
}
int Prog::first_byte() {
- std::call_once(first_byte_once_, [this]() {
- first_byte_ = ComputeFirstByte();
- });
+ std::call_once(first_byte_once_, [](Prog* prog) {
+ prog->first_byte_ = prog->ComputeFirstByte();
+ }, this);
return first_byte_;
}
@@ -513,11 +513,11 @@ void Prog::ComputeByteMap() {
builder.Build(bytemap_, &bytemap_range_);
- if (0) { // For debugging: use trivial bytemap.
+ if (0) { // For debugging, use trivial bytemap.
+ LOG(ERROR) << "Using trivial bytemap.";
for (int i = 0; i < 256; i++)
bytemap_[i] = static_cast<uint8_t>(i);
bytemap_range_ = 256;
- LOG(INFO) << "Using trivial bytemap.";
}
}
diff --git a/re2/prog.h b/re2/prog.h
index 339ef20..57a771c 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -10,14 +10,12 @@
// expression symbolically.
#include <stdint.h>
-#include <atomic>
#include <mutex>
#include <string>
#include <vector>
#include "util/util.h"
#include "util/logging.h"
-#include "util/mutex.h"
#include "util/sparse_array.h"
#include "util/sparse_set.h"
#include "re2/re2.h"
@@ -264,6 +262,10 @@ class Prog {
// for testing purposes. Returns number of states.
int BuildEntireDFA(MatchKind kind);
+ // Controls whether the DFA should bail out early if the NFA would be faster.
+ // FOR TESTING ONLY.
+ static void TEST_dfa_should_bail_when_slow(bool b);
+
// Compute bytemap.
void ComputeByteMap();
@@ -345,7 +347,7 @@ class Prog {
friend class Compiler;
DFA* GetDFA(MatchKind kind);
- void DeleteDFA(std::atomic<DFA*>* pdfa);
+ void DeleteDFA(DFA* dfa);
bool anchor_start_; // regexp has explicit start anchor
bool anchor_end_; // regexp has explicit end anchor
@@ -366,14 +368,15 @@ class Prog {
Inst* inst_; // pointer to instruction array
uint8_t* onepass_nodes_; // data for OnePass nodes
- Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_
- std::atomic<DFA*> dfa_first_; // DFA cached for kFirstMatch
- std::atomic<DFA*> dfa_longest_; // DFA cached for kLongestMatch/kFullMatch
- int64_t dfa_mem_; // Maximum memory for DFAs.
+ int64_t dfa_mem_; // Maximum memory for DFAs.
+ DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch
+ DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch
uint8_t bytemap_[256]; // map from input bytes to byte classes
std::once_flag first_byte_once_;
+ std::once_flag dfa_first_once_;
+ std::once_flag dfa_longest_once_;
Prog(const Prog&) = delete;
Prog& operator=(const Prog&) = delete;
diff --git a/re2/re2.cc b/re2/re2.cc
index c46caa5..132257b 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -23,7 +23,6 @@
#include <vector>
#include "util/util.h"
-#include "util/flags.h"
#include "util/logging.h"
#include "util/sparse_array.h"
#include "util/strutil.h"
@@ -31,17 +30,12 @@
#include "re2/prog.h"
#include "re2/regexp.h"
-DEFINE_bool(trace_re2, false, "trace RE2 execution");
-
namespace re2 {
// Maximum number of args we can set
static const int kMaxArgs = 16;
static const int kVecSize = 1+kMaxArgs;
-#ifdef _MSC_VER
-__declspec(selectany)
-#endif
const int RE2::Options::kDefaultMaxMem; // initialized in re2.h
RE2::Options::Options(RE2::CannedOptions opt)
@@ -236,15 +230,16 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
// Returns rprog_, computing it if needed.
re2::Prog* RE2::ReverseProg() const {
- std::call_once(rprog_once_, [this]() {
- rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3);
- if (rprog_ == NULL) {
- if (options_.log_errors())
- LOG(ERROR) << "Error reverse compiling '" << trunc(pattern_) << "'";
- error_ = new string("pattern too large - reverse compile failed");
- error_code_ = RE2::ErrorPatternTooLarge;
+ std::call_once(rprog_once_, [](const RE2* re) {
+ re->rprog_ =
+ re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
+ if (re->rprog_ == NULL) {
+ if (re->options_.log_errors())
+ LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
+ re->error_ = new string("pattern too large - reverse compile failed");
+ re->error_code_ = RE2::ErrorPatternTooLarge;
}
- });
+ }, this);
return rprog_;
}
@@ -289,32 +284,32 @@ int RE2::ProgramFanout(std::map<int, int>* histogram) const {
// Returns num_captures_, computing it if needed, or -1 if the
// regexp wasn't valid on construction.
int RE2::NumberOfCapturingGroups() const {
- std::call_once(num_captures_once_, [this]() {
- if (suffix_regexp_ != NULL)
- num_captures_ = suffix_regexp_->NumCaptures();
- });
+ std::call_once(num_captures_once_, [](const RE2* re) {
+ if (re->suffix_regexp_ != NULL)
+ re->num_captures_ = re->suffix_regexp_->NumCaptures();
+ }, this);
return num_captures_;
}
// Returns named_groups_, computing it if needed.
const std::map<string, int>& RE2::NamedCapturingGroups() const {
- std::call_once(named_groups_once_, [this]() {
- if (suffix_regexp_ != NULL)
- named_groups_ = suffix_regexp_->NamedCaptures();
- if (named_groups_ == NULL)
- named_groups_ = empty_named_groups;
- });
+ std::call_once(named_groups_once_, [](const RE2* re) {
+ if (re->suffix_regexp_ != NULL)
+ re->named_groups_ = re->suffix_regexp_->NamedCaptures();
+ if (re->named_groups_ == NULL)
+ re->named_groups_ = empty_named_groups;
+ }, this);
return *named_groups_;
}
// Returns group_names_, computing it if needed.
const std::map<int, string>& RE2::CapturingGroupNames() const {
- std::call_once(group_names_once_, [this]() {
- if (suffix_regexp_ != NULL)
- group_names_ = suffix_regexp_->CaptureNames();
- if (group_names_ == NULL)
- group_names_ = empty_group_names;
- });
+ std::call_once(group_names_once_, [](const RE2* re) {
+ if (re->suffix_regexp_ != NULL)
+ re->group_names_ = re->suffix_regexp_->CaptureNames();
+ if (re->group_names_ == NULL)
+ re->group_names_ = empty_group_names;
+ }, this);
return *group_names_;
}
@@ -668,24 +663,16 @@ bool RE2::Match(const StringPiece& text,
if (!prog_->SearchDFA(subtext, text, anchor, kind,
matchp, &dfa_failed, NULL)) {
if (dfa_failed) {
+ if (options_.log_errors())
+ LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
+ << "bytemap range " << prog_->bytemap_range() << ", "
+ << "list count " << prog_->list_count();
// Fall back to NFA below.
skipped_test = true;
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " DFA failed.";
break;
}
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " used DFA - no match.";
return false;
}
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " used DFA - match";
if (matchp == NULL) // Matched. Don't care where
return true;
// SearchDFA set match[0].end() but didn't know where the
@@ -697,26 +684,18 @@ bool RE2::Match(const StringPiece& text,
if (!prog->SearchDFA(match, text, Prog::kAnchored,
Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
if (dfa_failed) {
+ if (options_.log_errors())
+ LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
+ << "bytemap range " << prog_->bytemap_range() << ", "
+ << "list count " << prog_->list_count();
// Fall back to NFA below.
skipped_test = true;
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " reverse DFA failed.";
break;
}
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " DFA inconsistency.";
if (options_.log_errors())
- LOG(ERROR) << "DFA inconsistency";
+ LOG(ERROR) << "SearchDFA inconsistency";
return false;
}
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " used reverse DFA.";
break;
}
@@ -735,35 +714,24 @@ bool RE2::Match(const StringPiece& text,
// the DFA does.
if (can_one_pass && text.size() <= 4096 &&
(ncap > 1 || text.size() <= 8)) {
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " skipping DFA for OnePass.";
skipped_test = true;
break;
}
if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) {
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " skipping DFA for BitState.";
skipped_test = true;
break;
}
if (!prog_->SearchDFA(subtext, text, anchor, kind,
&match, &dfa_failed, NULL)) {
if (dfa_failed) {
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " DFA failed.";
+ if (options_.log_errors())
+ LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
+ << "bytemap range " << prog_->bytemap_range() << ", "
+ << "list count " << prog_->list_count();
+ // Fall back to NFA below.
skipped_test = true;
break;
}
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " used DFA - no match.";
return false;
}
break;
@@ -789,20 +757,12 @@ bool RE2::Match(const StringPiece& text,
}
if (can_one_pass && anchor != Prog::kUnanchored) {
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " using OnePass.";
if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchOnePass inconsistency";
return false;
}
} else if (can_bit_state && subtext1.size() <= bit_state_text_max) {
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " using BitState.";
if (!prog_->SearchBitState(subtext1, text, anchor,
kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
@@ -810,10 +770,6 @@ bool RE2::Match(const StringPiece& text,
return false;
}
} else {
- if (FLAGS_trace_re2)
- LOG(INFO) << "Match " << trunc(pattern_)
- << " [" << CEscape(subtext) << "]"
- << " using NFA.";
if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchNFA inconsistency";
@@ -880,7 +836,6 @@ bool RE2::DoMatch(const StringPiece& text,
int ncap = NumberOfCapturingGroups();
if (ncap < n) {
// RE has fewer capturing groups than number of arg pointers passed in
- VLOG(1) << "Asked for " << n << " but only have " << ncap;
delete[] heapvec;
return false;
}
@@ -890,8 +845,6 @@ bool RE2::DoMatch(const StringPiece& text,
const StringPiece& s = vec[i+1];
if (!args[i]->Parse(s.data(), s.size())) {
// TODO: Should we indicate what the error was?
- VLOG(1) << "Parse error on #" << i << " " << s << " "
- << (void*)s.data() << "/" << s.size();
delete[] heapvec;
return false;
}
diff --git a/re2/re2.h b/re2/re2.h
index 377a04d..c49de05 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -905,7 +905,7 @@ class LazyRE2 {
// Named accessor/initializer:
RE2* get() const {
- std::call_once(once_, [this]() { LazyRE2::Init(this); });
+ std::call_once(once_, &LazyRE2::Init, this);
return ptr_;
}
diff --git a/re2/regexp.h b/re2/regexp.h
index c11c2c1..e77d0d8 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -497,8 +497,7 @@ class Regexp {
// Allocate space for n sub-regexps.
void AllocSub(int n) {
- if (n < 0 || static_cast<uint16_t>(n) != n)
- LOG(FATAL) << "Cannot AllocSub " << n;
+ DCHECK(n >= 0 && static_cast<uint16_t>(n) == n);
if (n > 1)
submany_ = new Regexp*[n];
nsub_ = n;
diff --git a/re2/set.cc b/re2/set.cc
index db2d13d..1ee431c 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -101,12 +101,16 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
}
if (v != NULL)
v->clear();
- bool failed;
+ bool dfa_failed = false;
bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
- Prog::kManyMatch, NULL, &failed, v);
- if (failed)
- LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space";
-
+ Prog::kManyMatch, NULL, &dfa_failed, v);
+ if (dfa_failed) {
+ if (options_.log_errors())
+ LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
+ << "bytemap range " << prog_->bytemap_range() << ", "
+ << "list count " << prog_->list_count();
+ return false;
+ }
if (ret == false)
return false;
if (v != NULL && v->empty()) {
diff --git a/re2/stringpiece.cc b/re2/stringpiece.cc
index 94c2bcb..d25351f 100644
--- a/re2/stringpiece.cc
+++ b/re2/stringpiece.cc
@@ -10,9 +10,6 @@
using re2::StringPiece;
-#ifdef _MSC_VER
-__declspec(selectany)
-#endif
const StringPiece::size_type StringPiece::npos; // initialized in stringpiece.h
StringPiece::size_type StringPiece::copy(char* buf, size_type n,
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index 2051683..e56b3e0 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -18,8 +18,6 @@
static const bool UsingMallocCounter = false;
-DECLARE_bool(re2_dfa_bail_when_slow);
-
DEFINE_int32(size, 8, "log2(number of DFA nodes)");
DEFINE_int32(repeat, 2, "Repetition count.");
DEFINE_int32(threads, 4, "number of threads");
@@ -77,15 +75,10 @@ TEST(Multithreaded, BuildEntireDFA) {
// the DFA once the memory limits are reached.
TEST(SingleThreaded, BuildEntireDFA) {
// Create regexp with 2^30 states in DFA.
- string s = "a";
- for (int i = 0; i < 30; i++)
- s += "[ab]";
- s += "b";
-
- Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
+ Regexp* re = Regexp::Parse("a[ab]{30}b", Regexp::LikePerl, NULL);
CHECK(re);
- int max = 24;
- for (int i = 17; i < max; i++) {
+
+ for (int i = 17; i < 24; i++) {
int64_t limit = 1<<i;
int64_t usage;
//int64_t progusage, dfamem;
@@ -100,15 +93,15 @@ TEST(SingleThreaded, BuildEntireDFA) {
usage = m.HeapGrowth();
delete prog;
}
- if (!UsingMallocCounter)
- continue;
- //LOG(INFO) << "limit " << limit << ", "
- // << "prog usage " << progusage << ", "
- // << "DFA budget " << dfamem << ", "
- // << "total " << usage;
- // Tolerate +/- 10%.
- CHECK_GT(usage, limit*9/10);
- CHECK_LT(usage, limit*11/10);
+ if (UsingMallocCounter) {
+ //LOG(INFO) << "limit " << limit << ", "
+ // << "prog usage " << progusage << ", "
+ // << "DFA budget " << dfamem << ", "
+ // << "total " << usage;
+ // Tolerate +/- 10%.
+ CHECK_GT(usage, limit*9/10);
+ CHECK_LT(usage, limit*11/10);
+ }
}
re->Decref();
}
@@ -166,6 +159,14 @@ static string DeBruijnString(int n) {
// 2^n byte limit, it must be handling out-of-memory conditions
// gracefully.
TEST(SingleThreaded, SearchDFA) {
+ // The De Bruijn string is the worst case input for this regexp.
+ // By default, the DFA will notice that it is flushing its cache
+ // too frequently and will bail out early, so that RE2 can use the
+ // NFA implementation instead. (The DFA loses its speed advantage
+ // if it can't get a good cache hit rate.)
+ // Tell the DFA to trudge along instead.
+ Prog::TEST_dfa_should_bail_when_slow(false);
+
// Choice of n is mostly arbitrary, except that:
// * making n too big makes the test run for too long.
// * making n too small makes the DFA refuse to run,
@@ -182,14 +183,6 @@ TEST(SingleThreaded, SearchDFA) {
string no_match = DeBruijnString(n);
string match = no_match + "0";
- // The De Bruijn string is the worst case input for this regexp.
- // By default, the DFA will notice that it is flushing its cache
- // too frequently and will bail out early, so that RE2 can use the
- // NFA implementation instead. (The DFA loses its speed advantage
- // if it can't get a good cache hit rate.)
- // Tell the DFA to trudge along instead.
- FLAGS_re2_dfa_bail_when_slow = false;
-
int64_t usage;
int64_t peak_usage;
{
@@ -197,7 +190,8 @@ TEST(SingleThreaded, SearchDFA) {
Prog* prog = re->CompileToProg(1<<n);
CHECK(prog);
for (int i = 0; i < 10; i++) {
- bool matched, failed = false;
+ bool matched = false;
+ bool failed = false;
matched = prog->SearchDFA(match, NULL,
Prog::kUnanchored, Prog::kFirstMatch,
NULL, &failed, NULL);
@@ -213,13 +207,16 @@ TEST(SingleThreaded, SearchDFA) {
peak_usage = m.PeakHeapGrowth();
delete prog;
}
- if (!UsingMallocCounter)
- return;
- //LOG(INFO) << "usage " << usage << ", "
- // << "peak usage " << peak_usage;
- CHECK_LT(usage, 1<<n);
- CHECK_LT(peak_usage, 1<<n);
+ if (UsingMallocCounter) {
+ //LOG(INFO) << "usage " << usage << ", "
+ // << "peak usage " << peak_usage;
+ CHECK_LT(usage, 1<<n);
+ CHECK_LT(peak_usage, 1<<n);
+ }
re->Decref();
+
+ // Reset to original behaviour.
+ Prog::TEST_dfa_should_bail_when_slow(true);
}
// Helper function: searches for match, which should match,
@@ -243,6 +240,8 @@ static void DoSearch(Prog* prog, const StringPiece& match,
}
TEST(Multithreaded, SearchDFA) {
+ Prog::TEST_dfa_should_bail_when_slow(false);
+
// Same as single-threaded test above.
const int n = 18;
Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
@@ -250,7 +249,6 @@ TEST(Multithreaded, SearchDFA) {
CHECK(re);
string no_match = DeBruijnString(n);
string match = no_match + "0";
- FLAGS_re2_dfa_bail_when_slow = false;
// Check that single-threaded code works.
{
@@ -279,6 +277,9 @@ TEST(Multithreaded, SearchDFA) {
}
re->Decref();
+
+ // Reset to original behaviour.
+ Prog::TEST_dfa_should_bail_when_slow(true);
}
struct ReverseTest {
diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc
index c3b2a3c..867eac6 100644
--- a/re2/testing/filtered_re2_test.cc
+++ b/re2/testing/filtered_re2_test.cc
@@ -13,11 +13,12 @@
#include "re2/filtered_re2.h"
#include "re2/re2.h"
-DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc
-
namespace re2 {
struct FilterTestVars {
+ FilterTestVars() {}
+ explicit FilterTestVars(int min_atom_len) : f(min_atom_len) {}
+
std::vector<string> atoms;
std::vector<int> atom_indices;
std::vector<int> matches;
@@ -27,14 +28,20 @@ struct FilterTestVars {
TEST(FilteredRE2Test, EmptyTest) {
FilterTestVars v;
+
+ v.f.Compile(&v.atoms);
+ EXPECT_EQ(0, v.atoms.size());
+
+ // Compile has no effect at all when called before Add: it will not
+ // record that it has been called and it will not clear the vector.
+ // The second point does not matter here, but the first point means
+ // that an error will be logged during the call to AllMatches.
v.f.AllMatches("foo", v.atom_indices, &v.matches);
EXPECT_EQ(0, v.matches.size());
}
TEST(FilteredRE2Test, SmallOrTest) {
- FLAGS_filtered_re2_min_atom_len = 4;
-
- FilterTestVars v;
+ FilterTestVars v(4); // override the minimum atom length
int id;
v.f.Add("(foo|bar)", v.opts, &id);
@@ -47,7 +54,6 @@ TEST(FilteredRE2Test, SmallOrTest) {
}
TEST(FilteredRE2Test, SmallLatinTest) {
- FLAGS_filtered_re2_min_atom_len = 3;
FilterTestVars v;
int id;
@@ -163,21 +169,19 @@ bool CheckExpectedAtoms(const char* atoms[],
pass = pass && expected[i] == v->atoms[i];
if (!pass) {
- LOG(WARNING) << "Failed " << testname;
- LOG(WARNING) << "Expected #atoms = " << expected.size();
+ LOG(ERROR) << "Failed " << testname;
+ LOG(ERROR) << "Expected #atoms = " << expected.size();
for (size_t i = 0; i < expected.size(); i++)
- LOG(WARNING) << expected[i];
- LOG(WARNING) << "Found #atoms = " << v->atoms.size();
+ LOG(ERROR) << expected[i];
+ LOG(ERROR) << "Found #atoms = " << v->atoms.size();
for (size_t i = 0; i < v->atoms.size(); i++)
- LOG(WARNING) << v->atoms[i];
+ LOG(ERROR) << v->atoms[i];
}
return pass;
}
TEST(FilteredRE2Test, AtomTests) {
- FLAGS_filtered_re2_min_atom_len = 3;
-
int nfail = 0;
for (int i = 0; i < arraysize(atom_tests); i++) {
FilterTestVars v;
@@ -211,8 +215,6 @@ void FindAtomIndices(const std::vector<string>& atoms,
}
TEST(FilteredRE2Test, MatchEmptyPattern) {
- FLAGS_filtered_re2_min_atom_len = 3;
-
FilterTestVars v;
AtomTest* t = &atom_tests[0];
// We are using the regexps used in one of the atom tests
@@ -231,8 +233,6 @@ TEST(FilteredRE2Test, MatchEmptyPattern) {
}
TEST(FilteredRE2Test, MatchTests) {
- FLAGS_filtered_re2_min_atom_len = 3;
-
FilterTestVars v;
AtomTest* t = &atom_tests[2];
// We are using the regexps used in one of the atom tests
diff --git a/runtests b/runtests
index ce7c18d..2852244 100755
--- a/runtests
+++ b/runtests
@@ -4,7 +4,7 @@ success=true
for i
do
printf "%-40s" $i
- if sh -c "$i >$i.log 2>&1" 2>/dev/null
+ if $($i >$i.log 2>&1) 2>/dev/null
then
echo PASS
else
diff --git a/util/benchmark.cc b/util/benchmark.cc
index 5ca715a..125bbe3 100644
--- a/util/benchmark.cc
+++ b/util/benchmark.cc
@@ -4,6 +4,7 @@
#include <stdint.h>
#include <stdio.h>
+#include <stdlib.h>
#include <algorithm>
#include <chrono>
@@ -80,7 +81,7 @@ static void runN(Benchmark *b, int n, int siz) {
b->fnr(n, siz);
else {
fprintf(stderr, "%s: missing function\n", b->name);
- exit(2);
+ abort();
}
if(t0 != 0)
ns += nsec() - t0;
diff --git a/util/logging.cc b/util/logging.cc
deleted file mode 100644
index 8a59862..0000000
--- a/util/logging.cc
+++ /dev/null
@@ -1,9 +0,0 @@
-// Copyright 2015 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "util/logging.h"
-
-DEFINE_int32(minloglevel, 0, // INFO
- "Messages logged at a lower level than this don't actually get "
- "logged anywhere");
diff --git a/util/logging.h b/util/logging.h
index d1044b3..e9aa446 100644
--- a/util/logging.h
+++ b/util/logging.h
@@ -8,14 +8,12 @@
// Simplified version of Google's logging.
#include <assert.h>
-#include <stdio.h> /* for fwrite */
+#include <stdio.h>
+#include <stdlib.h>
#include <ostream>
#include <sstream>
#include "util/util.h"
-#include "util/flags.h"
-
-DECLARE_int32(minloglevel);
// Debug-only checking.
#define DCHECK(condition) assert(condition)
@@ -35,9 +33,9 @@ DECLARE_int32(minloglevel);
#define CHECK_EQ(x, y) CHECK((x) == (y))
#define CHECK_NE(x, y) CHECK((x) != (y))
-#define LOG_INFO LogMessage(__FILE__, __LINE__, 0)
-#define LOG_WARNING LogMessage(__FILE__, __LINE__, 1)
-#define LOG_ERROR LogMessage(__FILE__, __LINE__, 2)
+#define LOG_INFO LogMessage(__FILE__, __LINE__)
+#define LOG_WARNING LogMessage(__FILE__, __LINE__)
+#define LOG_ERROR LogMessage(__FILE__, __LINE__)
#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
#define LOG_QFATAL LOG_FATAL
@@ -58,17 +56,15 @@ DECLARE_int32(minloglevel);
class LogMessage {
public:
- LogMessage(const char* file, int line, int severity)
- : severity_(severity), flushed_(false) {
+ LogMessage(const char* file, int line)
+ : flushed_(false) {
stream() << file << ":" << line << ": ";
}
void Flush() {
stream() << "\n";
- if (severity_ >= re2::FLAGS_minloglevel) {
- string s = str_.str();
- size_t n = s.size();
- if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
- }
+ string s = str_.str();
+ size_t n = s.size();
+ if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
flushed_ = true;
}
~LogMessage() {
@@ -79,7 +75,6 @@ class LogMessage {
std::ostream& stream() { return str_; }
private:
- const int severity_;
bool flushed_;
std::ostringstream str_;
@@ -97,7 +92,7 @@ class LogMessage {
class LogMessageFatal : public LogMessage {
public:
LogMessageFatal(const char* file, int line)
- : LogMessage(file, line, 3) {}
+ : LogMessage(file, line) {}
~LogMessageFatal() {
Flush();
abort();
diff --git a/util/mix.h b/util/mix.h
index b36417c..d85c172 100644
--- a/util/mix.h
+++ b/util/mix.h
@@ -10,6 +10,13 @@
namespace re2 {
+// Silence "truncation of constant value" warning for kMul in 32-bit mode.
+// Since this is a header file, push and then pop to limit the scope.
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4309)
+#endif
+
class HashMix {
public:
HashMix() : hash_(1) {}
@@ -25,6 +32,10 @@ class HashMix {
size_t hash_;
};
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
} // namespace re2
#endif // UTIL_MIX_H_
diff --git a/util/pcre.cc b/util/pcre.cc
index 2d0f5df..fab0388 100644
--- a/util/pcre.cc
+++ b/util/pcre.cc
@@ -21,6 +21,13 @@
#include "util/pcre.h"
#include "util/strutil.h"
+// Silence warnings about the wacky formatting in the operator() functions.
+// Note that we test for Clang first because it defines __GNUC__ as well.
+#if defined(__clang__)
+#elif defined(__GNUC__) && __GNUC__ >= 6
+#pragma GCC diagnostic ignored "-Wmisleading-indentation"
+#endif
+
#define PCREPORT(level) LOG(level)
// Default PCRE limits.
@@ -730,10 +737,14 @@ int PCRE::NumberOfCapturingGroups() const {
if (re_partial_ == NULL) return -1;
int result;
- CHECK(pcre_fullinfo(re_partial_, // The regular expression object
- NULL, // We did not study the pattern
- PCRE_INFO_CAPTURECOUNT,
- &result) == 0);
+ int rc = pcre_fullinfo(re_partial_, // The regular expression object
+ NULL, // We did not study the pattern
+ PCRE_INFO_CAPTURECOUNT,
+ &result);
+ if (rc != 0) {
+ PCREPORT(ERROR) << "Unexpected return code: " << rc;
+ return -1;
+ }
return result;
}
diff --git a/util/sparse_array.h b/util/sparse_array.h
index 3b651cd..b9853a7 100644
--- a/util/sparse_array.h
+++ b/util/sparse_array.h
@@ -94,6 +94,7 @@
//
// A moved-from SparseArray will be empty.
+#include <assert.h>
#include <stdint.h>
#include <string.h>
#include <algorithm>
@@ -101,9 +102,6 @@
#include <utility>
#include <vector>
-#include "util/util.h"
-#include "util/logging.h"
-
namespace re2 {
template<typename Value>
@@ -121,10 +119,10 @@ class SparseArray {
typedef typename std::vector<IndexValue>::const_iterator const_iterator;
SparseArray(const SparseArray& src);
- SparseArray(SparseArray&& src) noexcept;
+ SparseArray(SparseArray&& src) /*noexcept*/;
SparseArray& operator=(const SparseArray& src);
- SparseArray& operator=(SparseArray&& src) noexcept;
+ SparseArray& operator=(SparseArray&& src) /*noexcept*/;
const IndexValue& iv(int i) const;
@@ -275,16 +273,14 @@ class SparseArray {
iterator SetInternal(bool allow_overwrite, int i, U&& v) { // NOLINT
DebugCheckInvariants();
if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
- LOG(DFATAL) << "(jyasskin) Illegal index " << i
- << " passed to SparseArray(" << max_size_
- << ").set" << (allow_overwrite?"":"_new") << "().";
+ assert(!"illegal index");
// Semantically, end() would be better here, but we already know
// the user did something stupid, so begin() insulates them from
// dereferencing an invalid pointer.
return begin();
}
if (!allow_overwrite) {
- DCHECK(!has_index(i));
+ assert(!has_index(i));
create_index(i);
} else {
if (!has_index(i))
@@ -296,7 +292,7 @@ class SparseArray {
template <typename U>
iterator SetExistingInternal(int i, U&& v) { // NOLINT
DebugCheckInvariants();
- DCHECK(has_index(i));
+ assert(has_index(i));
dense_[sparse_to_dense_[i]].value() = std::forward<U>(v);
DebugCheckInvariants();
return dense_.begin() + sparse_to_dense_[i];
@@ -333,7 +329,7 @@ SparseArray<Value>::SparseArray(const SparseArray& src)
}
template<typename Value>
-SparseArray<Value>::SparseArray(SparseArray&& src) noexcept // NOLINT
+SparseArray<Value>::SparseArray(SparseArray&& src) /*noexcept*/ // NOLINT
: size_(src.size_),
max_size_(src.max_size_),
sparse_to_dense_(std::move(src.sparse_to_dense_)),
@@ -356,7 +352,7 @@ SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) {
template<typename Value>
SparseArray<Value>& SparseArray<Value>::operator=(
- SparseArray&& src) noexcept { // NOLINT
+ SparseArray&& src) /*noexcept*/ { // NOLINT
size_ = src.size_;
max_size_ = src.max_size_;
sparse_to_dense_ = std::move(src.sparse_to_dense_);
@@ -382,9 +378,9 @@ class SparseArray<Value>::IndexValue {
int index() const { return index_; }
- Value& value() & { return second; }
- const Value& value() const & { return second; }
- Value&& value() && { return std::move(second); } // NOLINT
+ Value& value() /*&*/ { return second; }
+ const Value& value() const /*&*/ { return second; }
+ //Value&& value() /*&&*/ { return std::move(second); } // NOLINT
private:
int index_;
@@ -400,8 +396,8 @@ class SparseArray<Value>::IndexValue {
template<typename Value>
const typename SparseArray<Value>::IndexValue&
SparseArray<Value>::iv(int i) const {
- DCHECK_GE(i, 0);
- DCHECK_LT(i, size_);
+ assert(i >= 0);
+ assert(i < size_);
return dense_[i];
}
@@ -435,8 +431,8 @@ void SparseArray<Value>::resize(int max_size) {
// Check whether index i is in the array.
template<typename Value>
bool SparseArray<Value>::has_index(int i) const {
- DCHECK_GE(i, 0);
- DCHECK_LT(i, max_size_);
+ assert(i >= 0);
+ assert(i < max_size_);
if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
return false;
}
@@ -447,7 +443,7 @@ bool SparseArray<Value>::has_index(int i) const {
template<typename Value>
const Value& SparseArray<Value>::get_existing(int i) const {
- DCHECK(has_index(i));
+ assert(has_index(i));
return dense_[sparse_to_dense_[i]].second;
}
@@ -462,7 +458,7 @@ void SparseArray<Value>::erase(int i) {
template<typename Value>
void SparseArray<Value>::erase_existing(int i) {
DebugCheckInvariants();
- DCHECK(has_index(i));
+ assert(has_index(i));
int di = sparse_to_dense_[i];
if (di < size_ - 1) {
dense_[di] = std::move(dense_[size_ - 1]);
@@ -474,8 +470,8 @@ void SparseArray<Value>::erase_existing(int i) {
template<typename Value>
void SparseArray<Value>::create_index(int i) {
- DCHECK(!has_index(i));
- DCHECK_LT(size_, max_size_);
+ assert(!has_index(i));
+ assert(size_ < max_size_);
sparse_to_dense_[i] = size_;
dense_[size_].index_ = i;
size_++;
@@ -502,9 +498,9 @@ template<typename Value> SparseArray<Value>::~SparseArray() {
}
template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
- DCHECK_LE(0, size_);
- DCHECK_LE(size_, max_size_);
- DCHECK(size_ == 0 || sparse_to_dense_ != NULL);
+ assert(0 <= size_);
+ assert(size_ <= max_size_);
+ assert(size_ == 0 || sparse_to_dense_ != NULL);
}
// Comparison function for sorting.
diff --git a/util/sparse_set.h b/util/sparse_set.h
index c52ab74..b431643 100644
--- a/util/sparse_set.h
+++ b/util/sparse_set.h
@@ -47,6 +47,7 @@
//
// See sparse_array.h for implementation details.
+#include <assert.h>
#include <stdint.h>
#include <string.h>
#include <algorithm>
@@ -54,9 +55,6 @@
#include <utility>
#include <vector>
-#include "util/util.h"
-#include "util/logging.h"
-
namespace re2 {
template<typename Value>
@@ -134,16 +132,14 @@ class SparseSetT {
iterator InsertInternal(bool allow_existing, int i) {
DebugCheckInvariants();
if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
- LOG(DFATAL) << "(jyasskin) Illegal index " << i
- << " passed to SparseSet(" << max_size_
- << ").insert" << (allow_existing?"":"_new") << "().";
+ assert(!"illegal index");
// Semantically, end() would be better here, but we already know
// the user did something stupid, so begin() insulates them from
// dereferencing an invalid pointer.
return begin();
}
if (!allow_existing) {
- DCHECK(!contains(i));
+ assert(!contains(i));
create_index(i);
} else {
if (!contains(i))
@@ -203,8 +199,8 @@ void SparseSetT<Value>::resize(int max_size) {
// Check whether index i is in the set.
template<typename Value>
bool SparseSetT<Value>::contains(int i) const {
- DCHECK_GE(i, 0);
- DCHECK_LT(i, max_size_);
+ assert(i >= 0);
+ assert(i < max_size_);
if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
return false;
}
@@ -215,8 +211,8 @@ bool SparseSetT<Value>::contains(int i) const {
template<typename Value>
void SparseSetT<Value>::create_index(int i) {
- DCHECK(!contains(i));
- DCHECK_LT(size_, max_size_);
+ assert(!contains(i));
+ assert(size_ < max_size_);
sparse_to_dense_[i] = size_;
dense_[size_] = i;
size_++;
@@ -243,9 +239,9 @@ template<typename Value> SparseSetT<Value>::~SparseSetT() {
}
template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
- DCHECK_LE(0, size_);
- DCHECK_LE(size_, max_size_);
- DCHECK(size_ == 0 || sparse_to_dense_ != NULL);
+ assert(0 <= size_);
+ assert(size_ <= max_size_);
+ assert(size_ == 0 || sparse_to_dense_ != NULL);
}
// Comparison function for sorting.