diff options
-rw-r--r-- | BUILD | 28 | ||||
-rw-r--r-- | Makefile | 50 | ||||
-rw-r--r-- | README | 2 | ||||
-rw-r--r-- | re2.pc | 4 | ||||
-rw-r--r-- | re2/bitstate.cc | 16 | ||||
-rw-r--r-- | re2/compile.cc | 4 | ||||
-rw-r--r-- | re2/dfa.cc | 57 | ||||
-rw-r--r-- | re2/nfa.cc | 80 | ||||
-rw-r--r-- | re2/onepass.cc | 2 | ||||
-rw-r--r-- | re2/prog.cc | 12 | ||||
-rw-r--r-- | re2/prog.h | 8 | ||||
-rw-r--r-- | re2/testing/regexp_benchmark.cc | 40 | ||||
-rw-r--r-- | re2_test.bzl | 7 |
13 files changed, 199 insertions, 111 deletions
@@ -61,13 +61,12 @@ cc_library( "re2/variadic_function.h", ], copts = ["-pthread"], - includes = ["."], linkopts = ["-pthread"], visibility = ["//visibility:public"], ) cc_library( - name = "test", + name = "testing", testonly = 1, srcs = [ "re2/testing/backtrack.cc", @@ -79,7 +78,6 @@ cc_library( "re2/testing/tester.cc", "util/pcre.cc", "util/random.cc", - "util/test.cc", "util/thread.cc", ], hdrs = [ @@ -87,15 +85,21 @@ cc_library( "re2/testing/regexp_generator.h", "re2/testing/string_generator.h", "re2/testing/tester.h", + "util/benchmark.h", "util/pcre.h", "util/random.h", "util/test.h", "util/thread.h", ], - includes = ["."], deps = [":re2"], ) +cc_library( + name = "test", + srcs = ["util/test.cc"], + deps = [":testing"], +) + load("re2_test", "re2_test") re2_test("charclass_test") @@ -156,4 +160,18 @@ re2_test( size = "large", ) -# TODO: Add support for regexp_benchmark. +cc_library( + name = "benchmark", + srcs = ["util/benchmark.cc"], + deps = [":testing"], +) + +cc_binary( + name = "regexp_benchmark", + srcs = ["re2/testing/regexp_benchmark.cc"], + linkopts = [ + "-lm", + "-lrt", + ], + deps = [":benchmark"], +) @@ -13,9 +13,12 @@ # LDPCRE=-L/usr/local/lib -lpcre CXX?=g++ -CXXFLAGS?=-std=c++11 -O3 -g -pthread # can override -RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE) # required -LDFLAGS?=-pthread $(LDICU) +# can override +CXXFLAGS?=-O3 -g +LDFLAGS?= +# required +RE2_CXXFLAGS?=-std=c++11 -pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE) +RE2_LDFLAGS?=-pthread $(LDICU) $(LDPCRE) AR?=ar ARFLAGS?=rsc NM?=nm @@ -44,17 +47,17 @@ ifeq ($(shell uname),Darwin) SOEXT=dylib SOEXTVER=$(SONAME).$(SOEXT) SOEXTVER00=$(SONAME).0.0.$(SOEXT) -MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-install_name,@rpath/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(LDFLAGS) +MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-install_name,@rpath/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(RE2_LDFLAGS) $(LDFLAGS) else ifeq ($(shell uname),SunOS) SOEXT=so SOEXTVER=$(SOEXT).$(SONAME) SOEXTVER00=$(SOEXT).$(SONAME).0.0 -MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),-M,libre2.symbols $(LDFLAGS) +MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),-M,libre2.symbols $(RE2_LDFLAGS) $(LDFLAGS) else SOEXT=so SOEXTVER=$(SOEXT).$(SONAME) SOEXTVER00=$(SOEXT).$(SONAME).0.0 -MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols $(LDFLAGS) +MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols $(RE2_LDFLAGS) $(LDFLAGS) endif all: obj/libre2.a obj/so/libre2.$(SOEXT) @@ -173,15 +176,15 @@ DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS)) obj/%.o: %.cc $(HFILES) @mkdir -p $$(dirname $@) - $(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc + $(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc obj/dbg/%.o: %.cc $(HFILES) @mkdir -p $$(dirname $@) - $(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) $*.cc + $(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) $*.cc obj/so/%.o: %.cc $(HFILES) @mkdir -p $$(dirname $@) - $(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc + $(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc obj/libre2.a: $(OFILES) @mkdir -p obj @@ -198,20 +201,20 @@ obj/so/libre2.$(SOEXT): $(SOFILES) obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o @mkdir -p obj/dbg/test - $(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(LDFLAGS) $(LDPCRE) + $(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o @mkdir -p obj/test - $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(LDFLAGS) $(LDPCRE) + $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) # Test the shared lib, falling back to the static lib for private symbols obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o @mkdir -p obj/so/test - $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(LDFLAGS) $(LDPCRE) + $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o @mkdir -p obj/test - $(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o obj/libre2.a $(LDFLAGS) $(LDPCRE) + $(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) ifdef REBUILD_TABLES re2/perl_groups.cc: re2/make_perl_groups.pl @@ -272,18 +275,31 @@ install: obj/libre2.a obj/so/libre2.$(SOEXT) ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT) sed -e "s#@prefix@#${prefix}#" re2.pc >$(DESTDIR)$(libdir)/pkgconfig/re2.pc -testinstall: +testinstall: static-testinstall shared-testinstall + @echo + @echo Install tests passed. + @echo + +static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS) +static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS) +static-testinstall: @mkdir -p obj - cp testinstall.cc obj + @cp testinstall.cc obj ifeq ($(shell uname),Darwin) @echo Skipping test for libre2.a on Darwin. else ifeq ($(shell uname),SunOS) @echo Skipping test for libre2.a on SunOS. else - (cd obj && $(CXX) $(CXXFLAGS) -I$(DESTDIR)$(includedir) -L$(DESTDIR)$(libdir) testinstall.cc -l:libre2.a -o testinstall) + (cd obj && $(CXX) testinstall.cc -o testinstall $(CXXFLAGS) $(LDFLAGS)) obj/testinstall endif - (cd obj && $(CXX) $(CXXFLAGS) -I$(DESTDIR)$(includedir) -L$(DESTDIR)$(libdir) testinstall.cc -lre2 -o testinstall) + +shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS) +shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS) +shared-testinstall: + @mkdir -p obj + @cp testinstall.cc obj + (cd obj && $(CXX) testinstall.cc -o testinstall $(CXXFLAGS) $(LDFLAGS)) LD_LIBRARY_PATH=$(DESTDIR)$(libdir) obj/testinstall benchlog: obj/test/regexp_benchmark @@ -34,4 +34,4 @@ A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM. An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM. A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN. A Python wrapper is at https://github.com/facebook/pyre2/. -A Ruby wrapper is at https://github.com/axic/rre2/. +A Ruby wrapper is at https://github.com/mudge/re2/. @@ -6,5 +6,5 @@ libdir=${exec_prefix}/lib Name: re2 Description: RE2 is a fast, safe, thread-friendly regular expression engine. Version: 0.0.0 -Cflags: -I${includedir} -pthread -Libs: -L${libdir} -lre2 -pthread +Cflags: -std=c++11 -pthread -I${includedir} +Libs: -pthread -L${libdir} -lre2 diff --git a/re2/bitstate.cc b/re2/bitstate.cc index 661a8ca..53fa219 100644 --- a/re2/bitstate.cc +++ b/re2/bitstate.cc @@ -345,6 +345,15 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, return TrySearch(prog_->start(), text.begin()); } + // Shallow first byte analysis. We could go deeper. + int fb = -1; + Prog::Inst* ip = prog_->inst(prog_->start()); + if (ip->opcode() == kInstByteRange && + ip->lo() == ip->hi() && + !(ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z') && + ip->last()) + fb = ip->lo(); + // Unanchored search, starting from each possible text position. // Notice that we have to try the empty string at the end of // the text, so the loop condition is p <= text.end(), not p < text.end(). @@ -352,6 +361,13 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, // but we are not clearing visited_ between calls to TrySearch, // so no work is duplicated and it ends up still being linear. for (const char* p = text.begin(); p <= text.end(); p++) { + // Try to use memchr to find the first byte quickly. + if (fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) { + p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p)); + if (p == NULL) + p = text.end(); + } + cap_[0] = p; if (TrySearch(prog_->start(), p)) // Match must be leftmost; done. return true; diff --git a/re2/compile.cc b/re2/compile.cc index df859ab..334ee4c 100644 --- a/re2/compile.cc +++ b/re2/compile.cc @@ -409,7 +409,6 @@ Frag Compiler::ByteRange(int lo, int hi, bool foldcase) { if (id < 0) return NoMatch(); inst_[id].InitByteRange(lo, hi, foldcase, 0); - prog_->byte_inst_count_++; prog_->MarkByteRange(lo, hi); if (foldcase && lo <= 'z' && hi >= 'a') { if (lo < 'a') @@ -604,9 +603,6 @@ int Compiler::AddSuffixRecursive(int root, int id) { inst_[f.begin].set_out(br); } - // We just saved one ByteRange instruction. :) - prog_->byte_inst_count_--; - int out = inst_[id].out(); if (!IsCachedRuneByteSuffix(id)) { // The head should be the instruction most recently allocated, so free it @@ -441,8 +441,12 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem) fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str()); int nmark = 0; if (kind_ == Prog::kLongestMatch) - nmark = prog->size(); - nastack_ = 2 * prog->size() + nmark; + nmark = prog_->size(); + // See DFA::AddToQueue() for why this is so. + nastack_ = prog_->inst_count(kInstCapture) + + prog_->inst_count(kInstEmptyWidth) + + prog_->inst_count(kInstNop) + + nmark + 1; // + 1 for start inst // Account for space needed for DFA, q0, q1, astack. mem_budget_ -= sizeof(DFA); @@ -462,9 +466,11 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem) // At minimum, the search requires room for two states in order // to limp along, restarting frequently. We'll get better performance // if there is room for a larger number of states, say 20. + // Note that a state stores list heads only, so we use the program + // list count for the upper bound, not the program size. int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot int64 one_state = sizeof(State) + (nnext-1)*sizeof(std::atomic<State*>) + - (prog_->size()+nmark)*sizeof(int); + (prog_->list_count()+nmark)*sizeof(int); if (state_budget_ < 20*one_state) { LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld", prog_->size(), max_mem); @@ -472,8 +478,8 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem) return; } - q0_ = new Workq(prog->size(), nmark); - q1_ = new Workq(prog->size(), nmark); + q0_ = new Workq(prog_->size(), nmark); + q1_ = new Workq(prog_->size(), nmark); astack_ = new int[nastack_]; } @@ -791,12 +797,15 @@ void DFA::StateToWorkq(State* s, Workq* q) { // Adds ip to the work queue, following empty arrows according to flag. void DFA::AddToQueue(Workq* q, int id, uint flag) { - // Use astack_ to hold our stack of states yet to process. - // It is sized to have room for nastack_ == 2*prog->size() + nmark - // instructions, which is enough: each instruction can be - // processed by the switch below only once, and the processing - // pushes at most two instructions plus maybe a mark. - // (If we're using marks, nmark == prog->size(); otherwise nmark == 0.) + // Use astack_ to hold our stack of instructions yet to process. + // It was preallocated as follows: + // one entry per Capture; + // one entry per EmptyWidth; and + // one entry per Nop. + // This reflects the maximum number of stack pushes that each can + // perform. (Each instruction can be processed at most once.) + // When using marks, we also added nmark == prog_->size(). + // (Otherwise, nmark == 0.) int* stk = astack_; int nstk = 0; @@ -805,6 +814,7 @@ void DFA::AddToQueue(Workq* q, int id, uint flag) { DCHECK_LE(nstk, nastack_); id = stk[--nstk]; + Loop: if (id == Mark) { q->mark(); continue; @@ -831,9 +841,10 @@ void DFA::AddToQueue(Workq* q, int id, uint flag) { case kInstByteRange: // just save these on the queue case kInstMatch: - if (!ip->last()) - stk[nstk++] = id+1; - break; + if (ip->last()) + break; + id = id+1; + goto Loop; case kInstCapture: // DFA treats captures as no-ops. case kInstNop: @@ -847,13 +858,13 @@ void DFA::AddToQueue(Workq* q, int id, uint flag) { if (ip->opcode() == kInstNop && q->maxmark() > 0 && id == prog_->start_unanchored() && id != prog_->start()) stk[nstk++] = Mark; - stk[nstk++] = ip->out(); - break; + id = ip->out(); + goto Loop; case kInstAltMatch: DCHECK(!ip->last()); - stk[nstk++] = id+1; - break; + id = id+1; + goto Loop; case kInstEmptyWidth: if (!ip->last()) @@ -862,8 +873,8 @@ void DFA::AddToQueue(Workq* q, int id, uint flag) { // Continue on if we have all the right flag bits. if (ip->empty() & ~flag) break; - stk[nstk++] = ip->out(); - break; + id = ip->out(); + goto Loop; } } } @@ -1783,11 +1794,11 @@ bool DFA::Search(const StringPiece& text, // This is a separate function so that // prog.h can be used without moving the definition of // class DFA out of this file. If you (atomically) set -// prog->dfa_first_ = dfa; +// prog_->dfa_first_ = dfa; // or -// prog->dfa_longest_ = dfa; +// prog_->dfa_longest_ = dfa; // then you also have to set -// prog->delete_dfa_ = DeleteDFA; +// prog_->delete_dfa_ = DeleteDFA; // so that ~Prog can delete the DFA. static void DeleteDFA(std::atomic<DFA*>* pdfa) { DFA* dfa = pdfa->load(std::memory_order_relaxed); @@ -55,10 +55,7 @@ class NFA { private: struct Thread { - union { - int id; - Thread* next; // when on free list - }; + Thread* next; // when on free list const char** capture; }; @@ -127,7 +124,7 @@ class NFA { NFA::NFA(Prog* prog) { prog_ = prog; - start_ = prog->start(); + start_ = prog_->start(); ncapture_ = 0; longest_ = false; endmatch_ = false; @@ -135,7 +132,10 @@ NFA::NFA(Prog* prog) { etext_ = NULL; q0_.resize(prog_->size()); q1_.resize(prog_->size()); - nastack_ = 2*prog_->size(); + // See NFA::AddToThreadq() for why this is so. + nastack_ = 2*prog_->inst_count(kInstCapture) + + prog_->inst_count(kInstEmptyWidth) + + prog_->inst_count(kInstNop) + 1; // + 1 for start inst astack_ = new AddState[nastack_]; match_ = NULL; matched_ = false; @@ -188,18 +188,22 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag, if (id0 == 0) return; - // Astack_ is pre-allocated to avoid resize operations. - // It has room for 2*prog_->size() entries, which is enough: - // Each inst in prog can be processed at most once, - // pushing at most two entries on stk. - - int nstk = 0; + // Use astack_ to hold our stack of instructions yet to process. + // It was preallocated as follows: + // two entries per Capture; + // one entry per EmptyWidth; and + // one entry per Nop. + // This reflects the maximum number of stack pushes that each can + // perform. (Each instruction can be processed at most once.) AddState* stk = astack_; - stk[nstk++] = AddState(id0); + int nstk = 0; + stk[nstk++] = AddState(id0); while (nstk > 0) { DCHECK_LE(nstk, nastack_); - const AddState& a = stk[--nstk]; + AddState a = stk[--nstk]; + + Loop: if (a.j >= 0) capture[a.j] = a.cap_j; @@ -230,23 +234,22 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag, break; case kInstAltMatch: - DCHECK(!ip->last()); - stk[nstk++] = AddState(id+1); - // Save state; will pick up at next byte. t = AllocThread(); - t->id = id; CopyCapture(t->capture, capture); *tp = t; - break; + + DCHECK(!ip->last()); + a = AddState(id+1); + goto Loop; case kInstNop: if (!ip->last()) stk[nstk++] = AddState(id+1); // Continue on. - stk[nstk++] = AddState(ip->out()); - break; + a = AddState(ip->out()); + goto Loop; case kInstCapture: if (!ip->last()) @@ -260,22 +263,22 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag, // Record capture. capture[j] = p; } - stk[nstk++] = AddState(ip->out()); - break; + a = AddState(ip->out()); + goto Loop; case kInstMatch: case kInstByteRange: - if (!ip->last()) - stk[nstk++] = AddState(id+1); - // Save state; will pick up at next byte. t = AllocThread(); - t->id = id; CopyCapture(t->capture, capture); *tp = t; if (Debug) fprintf(stderr, " + %d%s [%p]\n", id, FormatCapture(t->capture).c_str(), t); - break; + + if (ip->last()) + break; + a = AddState(id+1); + goto Loop; case kInstEmptyWidth: if (!ip->last()) @@ -284,8 +287,8 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag, // Continue on if we have all the right flag bits. if (ip->empty() & ~flag) break; - stk[nstk++] = AddState(ip->out()); - break; + a = AddState(ip->out()); + goto Loop; } } } @@ -314,7 +317,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { } } - int id = t->id; + int id = i->index(); Prog::Inst* ip = prog_->inst(id); switch (ip->opcode()) { @@ -333,7 +336,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { break; // The match is ours if we want it. if (ip->greedy(prog_) || longest_) { - CopyCapture((const char**)match_, t->capture); + CopyCapture(match_, t->capture); FreeThread(t); for (++i; i != runq->end(); ++i) FreeThread(i->second); @@ -349,7 +352,6 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { if (endmatch_ && p != etext_) break; - const char* old = t->capture[1]; // previous end pointer t->capture[1] = p; if (longest_) { // Leftmost-longest mode: save this match only if @@ -357,16 +359,15 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { // point but longer than an existing match. if (!matched_ || t->capture[0] < match_[0] || (t->capture[0] == match_[0] && t->capture[1] > match_[1])) - CopyCapture((const char**)match_, t->capture); + CopyCapture(match_, t->capture); } else { // Leftmost-biased mode: this match is by definition // better than what we've already found (see next line). - CopyCapture((const char**)match_, t->capture); + CopyCapture(match_, t->capture); // Cut off the threads that can only find matches // worse than the one we just found: don't run the // rest of the current Threadq. - t->capture[0] = old; FreeThread(t); for (++i; i != runq->end(); ++i) FreeThread(i->second); @@ -374,7 +375,6 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { matched_ = true; return 0; } - t->capture[0] = old; matched_ = true; break; } @@ -513,8 +513,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, Thread* t = i->second; if (t == NULL) continue; - fprintf(stderr, " %d%s", t->id, - FormatCapture((const char**)t->capture).c_str()); + fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str()); } fprintf(stderr, "\n"); } @@ -581,11 +580,8 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, flag = Prog::EmptyFlags(context, p); } - // Steal match storage (cleared but unused as of yet) - // temporarily to hold match boundaries for new thread. match_[0] = p; AddToThreadq(runq, start_, flag, p, match_); - match_[0] = NULL; } // If all the threads have died, stop early. diff --git a/re2/onepass.cc b/re2/onepass.cc index 977ed96..977b0e7 100644 --- a/re2/onepass.cc +++ b/re2/onepass.cc @@ -384,7 +384,7 @@ bool Prog::IsOnePass() { // Willing to use at most 1/4 of the DFA budget (heuristic). // Limit max node count to 65000 as a conservative estimate to // avoid overflowing 16-bit node index in encoding. - int maxnodes = 2 + byte_inst_count_; + int maxnodes = 2 + inst_count(kInstByteRange); int statesize = sizeof(OneState) + (bytemap_range_-1)*sizeof(uint32); if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes) return false; diff --git a/re2/prog.cc b/re2/prog.cc index b5f48ef..15f10e1 100644 --- a/re2/prog.cc +++ b/re2/prog.cc @@ -98,7 +98,6 @@ Prog::Prog() start_(0), start_unanchored_(0), size_(0), - byte_inst_count_(0), bytemap_range_(0), flags_(0), onepass_statesize_(0), @@ -383,13 +382,24 @@ void Prog::Flatten() { flat.back().set_last(); } + list_count_ = flatmap.size(); + for (int i = 0; i < kNumInst; i++) + inst_count_[i] = 0; + // Third pass: Remaps outs to flat-ids. + // Counts instructions by opcode. for (int id = 0; id < static_cast<int>(flat.size()); id++) { Inst* ip = &flat[id]; if (ip->opcode() != kInstAltMatch) // handled in EmitList() ip->set_out(flatmap[ip->out()]); + inst_count_[ip->opcode()]++; } + int total = 0; + for (int i = 0; i < kNumInst; i++) + total += inst_count_[i]; + DCHECK_EQ(total, static_cast<int>(flat.size())); + // Remap start_unanchored and start. if (start_unanchored() == 0) { DCHECK_EQ(start(), 0); @@ -58,6 +58,7 @@ enum InstOp { kInstMatch, // found a match! kInstNop, // no-op; occasionally unavoidable kInstFail, // never match; occasionally unavoidable + kNumInst, }; // Bit flags for empty-width specials @@ -215,7 +216,8 @@ class Prog { int size() { return size_; } bool reversed() { return reversed_; } void set_reversed(bool reversed) { reversed_ = reversed; } - int byte_inst_count() { return byte_inst_count_; } + int list_count() { return list_count_; } + int inst_count(InstOp op) { return inst_count_[op]; } const Bitmap<256>& byterange() { return byterange_; } void set_dfa_mem(int64 dfa_mem) { dfa_mem_ = dfa_mem; } int64 dfa_mem() { return dfa_mem_; } @@ -378,11 +380,13 @@ class Prog { int start_; // entry point for program int start_unanchored_; // unanchored entry point for program int size_; // number of instructions - int byte_inst_count_; // number of kInstByteRange instructions int bytemap_range_; // bytemap_[x] < bytemap_range_ int flags_; // regexp parse flags int onepass_statesize_; // byte size of each OneState* node + int list_count_; // count of lists (see above) + int inst_count_[kNumInst]; // count of instructions by opcode + Inst* inst_; // pointer to instruction array Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_ diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc index 8318e28..311a60e 100644 --- a/re2/testing/regexp_benchmark.cc +++ b/re2/testing/regexp_benchmark.cc @@ -174,6 +174,10 @@ void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) { // figure out there's no match. #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" +// This has quite a high degree of fanout. +// NFA execution will be particularly slow. +#define FANOUT "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]" + // This stresses engines that are trying to track parentheses. #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \ "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" @@ -226,6 +230,18 @@ BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs()); #endif BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); +void Search_Fanout_CachedDFA(int i, int n) { Search(i, n, FANOUT, SearchCachedDFA); } +void Search_Fanout_CachedNFA(int i, int n) { Search(i, n, FANOUT, SearchCachedNFA); } +void Search_Fanout_CachedPCRE(int i, int n) { Search(i, n, FANOUT, SearchCachedPCRE); } +void Search_Fanout_CachedRE2(int i, int n) { Search(i, n, FANOUT, SearchCachedRE2); } + +BENCHMARK_RANGE(Search_Fanout_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Fanout_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); +#ifdef USEPCRE +BENCHMARK_RANGE(Search_Fanout_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs()); +#endif +BENCHMARK_RANGE(Search_Fanout_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); + void Search_Parens_CachedDFA(int i, int n) { Search(i, n, PARENS, SearchCachedDFA); } void Search_Parens_CachedNFA(int i, int n) { Search(i, n, PARENS, SearchCachedNFA); } void Search_Parens_CachedPCRE(int i, int n) { Search(i, n, PARENS, SearchCachedPCRE); } @@ -299,11 +315,13 @@ void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search // Unambiguous search (RE2 can use OnePass). void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA); } +void Search_Success_NFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchNFA); } void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCRE); } void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2); } void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); } BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); #ifdef USEPCRE BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); #endif @@ -311,11 +329,13 @@ BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs()); void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedDFA); } +void Search_Success_CachedNFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedNFA); } void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedPCRE); } void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedRE2); } void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); } BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); #ifdef USEPCRE BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); #endif @@ -326,28 +346,32 @@ BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs( // Used to be ".*.$", but that is coalesced to ".+$" these days. void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchDFA); } +void Search_Success1_NFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchNFA); } void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchPCRE); } void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchRE2); } void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchBitState); } BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); #ifdef USEPCRE BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); #endif BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); -void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedDFA); } -void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedPCRE); } -void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedRE2); } -void Search_Success1_Cached_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedBitState); } +void Search_Success1_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedDFA); } +void Search_Success1_CachedNFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedNFA); } +void Search_Success1_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedPCRE); } +void Search_Success1_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedRE2); } +void Search_Success1_CachedBitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedBitState); } -BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); #ifdef USEPCRE -BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); #endif -BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); -BENCHMARK_RANGE(Search_Success1_Cached_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); +BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); // Benchmark: AltMatch optimisation (just to verify that it works) // Note that OnePass doesn't implement it! diff --git a/re2_test.bzl b/re2_test.bzl index 8dafbd5..94443f6 100644 --- a/re2_test.bzl +++ b/re2_test.bzl @@ -2,14 +2,11 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -# Define a bazel macro that creates cc_test for re2. +# Defines a Bazel macro that instantiates a native cc_test rule for an RE2 test. def re2_test(name, deps=[], size="medium"): native.cc_test( name=name, srcs=["re2/testing/%s.cc" % (name)], - deps=[ - ":re2", - ":test", - ] + deps, + deps=[":test"] + deps, size = size, ) |