diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-11-21 16:58:22 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-11-21 16:58:22 +0900 |
commit | 11b439be08e713284f09eba5547142b6ea2924f3 (patch) | |
tree | 6e8aab0313cb718030050a0e2bfe1acdb6a21dd0 | |
parent | 2e30f7d1bb7894979dafc4fcd68dd8664c5584ab (diff) | |
download | re2-11b439be08e713284f09eba5547142b6ea2924f3.tar.gz re2-11b439be08e713284f09eba5547142b6ea2924f3.tar.bz2 re2-11b439be08e713284f09eba5547142b6ea2924f3.zip |
Imported Upstream version 20160601upstream/20160601
Change-Id: I89d6c9dac409aeeb940c58029f34a1292d4cb0a1
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
-rw-r--r-- | BUILD | 1 | ||||
-rw-r--r-- | Makefile | 32 | ||||
-rw-r--r-- | re2.pc | 6 | ||||
-rw-r--r-- | re2/bitstate.cc | 10 | ||||
-rw-r--r-- | re2/compile.cc | 42 | ||||
-rw-r--r-- | re2/dfa.cc | 23 | ||||
-rw-r--r-- | re2/fuzzing/re2_fuzzer.cc | 106 | ||||
-rw-r--r-- | re2/nfa.cc | 226 | ||||
-rw-r--r-- | re2/onepass.cc | 114 | ||||
-rw-r--r-- | re2/prog.cc | 131 | ||||
-rw-r--r-- | re2/prog.h | 23 | ||||
-rw-r--r-- | re2/re2.cc | 5 | ||||
-rw-r--r-- | re2/re2.h | 95 | ||||
-rw-r--r-- | re2/testing/compile_test.cc | 27 | ||||
-rw-r--r-- | re2/variadic_function.h | 344 | ||||
-rw-r--r-- | util/fuzz.cc | 21 | ||||
-rw-r--r-- | util/test.cc | 2 | ||||
-rw-r--r-- | util/util.h | 16 |
18 files changed, 513 insertions, 711 deletions
@@ -58,7 +58,6 @@ cc_library( "re2/re2.h", "re2/set.h", "re2/stringpiece.h", - "re2/variadic_function.h", ], copts = ["-pthread"], linkopts = ["-pthread"], @@ -28,13 +28,20 @@ NMFLAGS?=-p # http://www.gnu.org/prep/standards/standards.html prefix=/usr/local exec_prefix=$(prefix) -bindir=$(exec_prefix)/bin includedir=$(prefix)/include libdir=$(exec_prefix)/lib INSTALL=install -INSTALL_PROGRAM=$(INSTALL) INSTALL_DATA=$(INSTALL) -m 644 +# Work around the weirdness of sed(1) on Darwin. :/ +ifeq ($(shell uname),Darwin) +SED_INPLACE=sed -i '' +else ifeq ($(shell uname),SunOS) +SED_INPLACE=sed -i +else +SED_INPLACE=sed -i +endif + # ABI version # http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html SONAME=0 @@ -43,6 +50,7 @@ SONAME=0 # access for Unicode data), uncomment the following line: # REBUILD_TABLES=1 +# The SunOS linker does not support wildcards. :( ifeq ($(shell uname),Darwin) SOEXT=dylib SOEXTVER=$(SONAME).$(SOEXT) @@ -52,7 +60,7 @@ else ifeq ($(shell uname),SunOS) SOEXT=so SOEXTVER=$(SOEXT).$(SONAME) SOEXTVER00=$(SOEXT).$(SONAME).0.0 -MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),-M,libre2.symbols $(RE2_LDFLAGS) $(LDFLAGS) +MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER) $(RE2_LDFLAGS) $(LDFLAGS) else SOEXT=so SOEXTVER=$(SOEXT).$(SONAME) @@ -67,7 +75,6 @@ INSTALL_HFILES=\ re2/re2.h\ re2/set.h\ re2/stringpiece.h\ - re2/variadic_function.h\ HFILES=\ util/benchmark.h\ @@ -97,7 +104,6 @@ HFILES=\ re2/testing/tester.h\ re2/unicode_casefold.h\ re2/unicode_groups.h\ - re2/variadic_function.h\ re2/walker-inl.h\ OFILES=\ @@ -216,6 +222,14 @@ obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TES @mkdir -p obj/test $(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) +# re2_fuzzer is a target for fuzzers like libFuzzer and AFL. This fake fuzzing +# is simply a way to check that the target builds and then to run it against a +# fixed set of inputs. To perform real fuzzing, refer to the documentation for +# libFuzzer (llvm.org/docs/LibFuzzer.html) and AFL (lcamtuf.coredump.cx/afl/). +obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o + @mkdir -p obj/test + $(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) + ifdef REBUILD_TABLES re2/perl_groups.cc: re2/make_perl_groups.pl perl $< > $@ @@ -266,6 +280,8 @@ shared-bigtest: $(STESTS) $(SBIGTESTS) benchmark: obj/test/regexp_benchmark +fuzz: obj/test/re2_fuzzer + install: obj/libre2.a obj/so/libre2.$(SOEXT) mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig $(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2 @@ -273,7 +289,11 @@ install: obj/libre2.a obj/so/libre2.$(SOEXT) $(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00) ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER) ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT) - sed -e "s#@prefix@#${prefix}#" re2.pc >$(DESTDIR)$(libdir)/pkgconfig/re2.pc + $(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc + $(SED_INPLACE) -e "s#@prefix@#${prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc + $(SED_INPLACE) -e "s#@exec_prefix@#${exec_prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc + $(SED_INPLACE) -e "s#@includedir@#${includedir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc + $(SED_INPLACE) -e "s#@libdir@#${libdir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc testinstall: static-testinstall shared-testinstall @echo @@ -1,7 +1,7 @@ prefix=@prefix@ -exec_prefix=${prefix} -includedir=${prefix}/include -libdir=${exec_prefix}/lib +exec_prefix=@exec_prefix@ +includedir=@includedir@ +libdir=@libdir@ Name: re2 Description: RE2 is a fast, safe, thread-friendly regular expression engine. diff --git a/re2/bitstate.cc b/re2/bitstate.cc index 53fa219..775fbec 100644 --- a/re2/bitstate.cc +++ b/re2/bitstate.cc @@ -345,15 +345,6 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, return TrySearch(prog_->start(), text.begin()); } - // Shallow first byte analysis. We could go deeper. - int fb = -1; - Prog::Inst* ip = prog_->inst(prog_->start()); - if (ip->opcode() == kInstByteRange && - ip->lo() == ip->hi() && - !(ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z') && - ip->last()) - fb = ip->lo(); - // Unanchored search, starting from each possible text position. // Notice that we have to try the empty string at the end of // the text, so the loop condition is p <= text.end(), not p < text.end(). @@ -362,6 +353,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, // so no work is duplicated and it ends up still being linear. for (const char* p = text.begin(); p <= text.end(); p++) { // Try to use memchr to find the first byte quickly. + int fb = prog_->first_byte(); if (fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) { p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p)); if (p == NULL) diff --git a/re2/compile.cc b/re2/compile.cc index 334ee4c..117679f 100644 --- a/re2/compile.cc +++ b/re2/compile.cc @@ -178,9 +178,6 @@ class Compiler : public Regexp::Walker<Frag> { // Returns -1 if no more instructions are available. int AllocInst(int n); - // Deletes unused instructions. - void Trim(); - // Rune range compiler. // Begins a new alternation. @@ -291,16 +288,6 @@ int Compiler::AllocInst(int n) { return id; } -void Compiler::Trim() { - if (inst_len_ < inst_cap_) { - Prog::Inst* ip = new Prog::Inst[inst_len_]; - memmove(ip, inst_, inst_len_ * sizeof ip[0]); - delete[] inst_; - inst_ = ip; - inst_cap_ = inst_len_; - } -} - // These routines are somewhat hard to visualize in text -- // see http://swtch.com/~rsc/regexp/regexp1.html for // pictures explaining what is going on here. @@ -409,15 +396,6 @@ Frag Compiler::ByteRange(int lo, int hi, bool foldcase) { if (id < 0) return NoMatch(); inst_[id].InitByteRange(lo, hi, foldcase, 0); - prog_->MarkByteRange(lo, hi); - if (foldcase && lo <= 'z' && hi >= 'a') { - if (lo < 'a') - lo = 'a'; - if (hi > 'z') - hi = 'z'; - if (lo <= hi) - prog_->MarkByteRange(lo + 'A' - 'a', hi + 'A' - 'a'); - } return Frag(id, PatchList::Mk(id << 1)); } @@ -445,19 +423,6 @@ Frag Compiler::EmptyWidth(EmptyOp empty) { if (id < 0) return NoMatch(); inst_[id].InitEmptyWidth(empty, 0); - if (empty & (kEmptyBeginLine|kEmptyEndLine)) - prog_->MarkByteRange('\n', '\n'); - if (empty & (kEmptyWordBoundary|kEmptyNonWordBoundary)) { - int j; - for (int i = 0; i < 256; i = j) { - for (j = i + 1; j < 256 && - Prog::IsWordChar(static_cast<uint8>(i)) == - Prog::IsWordChar(static_cast<uint8>(j)); - j++) - ; - prog_->MarkByteRange(i, j-1); - } - } return Frag(id, PatchList::Mk(id << 1)); } @@ -1223,17 +1188,14 @@ Prog* Compiler::Finish() { inst_len_ = 1; } - // Trim instruction to minimum array and transfer to Prog. - Trim(); + // Hand off the array to Prog. prog_->inst_ = inst_; prog_->size_ = inst_len_; inst_ = NULL; - // Compute byte map. - prog_->ComputeByteMap(); - prog_->Optimize(); prog_->Flatten(); + prog_->ComputeByteMap(); // Record remaining memory for DFA. if (max_mem_ <= 0) { @@ -1789,23 +1789,6 @@ bool DFA::Search(const StringPiece& text, return ret; } -// Deletes dfa. -// -// This is a separate function so that -// prog.h can be used without moving the definition of -// class DFA out of this file. If you (atomically) set -// prog_->dfa_first_ = dfa; -// or -// prog_->dfa_longest_ = dfa; -// then you also have to set -// prog_->delete_dfa_ = DeleteDFA; -// so that ~Prog can delete the DFA. -static void DeleteDFA(std::atomic<DFA*>* pdfa) { - DFA* dfa = pdfa->load(std::memory_order_relaxed); - if (dfa != NULL) - delete dfa; -} - DFA* Prog::GetDFA(MatchKind kind) { std::atomic<DFA*>* pdfa; if (kind == kFirstMatch || kind == kManyMatch) { @@ -1837,13 +1820,17 @@ DFA* Prog::GetDFA(MatchKind kind) { m = 0; } dfa = new DFA(this, kind, m); - delete_dfa_ = DeleteDFA; // Synchronize with "quick check" above. pdfa->store(dfa, std::memory_order_release); return dfa; } +void Prog::DeleteDFA(std::atomic<DFA*>* pdfa) { + DFA* dfa = pdfa->load(std::memory_order_relaxed); + if (dfa != NULL) + delete dfa; +} // Executes the regexp program to search in text, // which itself is inside the larger context. (As a convenience, diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc new file mode 100644 index 0000000..b4e6bb6 --- /dev/null +++ b/re2/fuzzing/re2_fuzzer.cc @@ -0,0 +1,106 @@ +// Copyright 2016 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <string> + +#include "re2/re2.h" +#include "util/logging.h" + +using re2::FLAGS_minloglevel; +using re2::StringPiece; +using std::map; +using std::string; + +// NOT static, NOT signed. +uint8_t dummy = 0; + +void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) { + RE2 re(pattern, options); + if (!re.ok()) + return; + + // Don't waste time fuzzing high-fanout programs. + map<int, int> histogram; + int fanout = re.ProgramFanout(&histogram); + if (fanout > 10) + return; + + StringPiece sp1, sp2, sp3, sp4; + string s1, s2, s3, s4; + int i1, i2, i3, i4; + double d1, d2, d3, d4; + + RE2::FullMatch(text, re, &sp1, &sp2, &sp3, &sp4); + RE2::PartialMatch(text, re, &s1, &s2, &s3, &s4); + + sp1 = sp2 = text; + RE2::Consume(&sp1, re, &i1, &i2, &i3, &i4); + RE2::FindAndConsume(&sp2, re, &d1, &d2, &d3, &d4); + + s3 = s4 = text.ToString(); + RE2::Replace(&s3, re, ""); + RE2::GlobalReplace(&s4, re, ""); + + // Exercise some other API functionality. + dummy += re.NumberOfCapturingGroups(); + dummy += RE2::QuoteMeta(pattern).size(); +} + +// Entry point for libFuzzer. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + if (size == 0) + return 0; + + // Suppress logging below FATAL severity. + FLAGS_minloglevel = 3; + + // The one-at-a-time hash by Bob Jenkins. + uint32_t hash = 0; + for (size_t i = 0; i < size; i++) { + hash += data[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + RE2::Options options; + options.set_log_errors(false); + options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1 + : RE2::Options::EncodingUTF8); + options.set_posix_syntax(hash & 2); + options.set_longest_match(hash & 4); + options.set_literal(hash & 8); + options.set_never_nl(hash & 16); + options.set_dot_nl(hash & 32); + options.set_never_capture(hash & 64); + options.set_case_sensitive(hash & 128); + options.set_perl_classes(hash & 256); + options.set_word_boundary(hash & 512); + options.set_one_line(hash & 1024); + + const char* ptr = reinterpret_cast<const char*>(data); + int len = static_cast<int>(size); + + StringPiece pattern(ptr, len); + StringPiece text(ptr, len); + Test(pattern, options, text); + + for (int i = 2; i <= 4; i++) { + if (len < i) + break; + + int frac = len / i; + pattern.set(ptr, frac); + text.set(ptr + frac, len - frac); + Test(pattern, options, text); + } + + return 0; +} @@ -55,22 +55,24 @@ class NFA { private: struct Thread { - Thread* next; // when on free list + union { + int ref; + Thread* next; // when on free list + }; const char** capture; }; // State for explicit stack in AddToThreadq. struct AddState { - int id; // Inst to process - int j; - const char* cap_j; // if j>=0, set capture[j] = cap_j before processing ip + int id; // Inst to process + Thread* t; // if not null, set t0 = t before processing id AddState() - : id(0), j(-1), cap_j(NULL) {} + : id(0), t(NULL) {} explicit AddState(int id) - : id(id), j(-1), cap_j(NULL) {} - AddState(int id, const char* cap_j, int j) - : id(id), j(j), cap_j(cap_j) {} + : id(id), t(NULL) {} + AddState(int id, Thread* t) + : id(id), t(t) {} }; // Threadq is a list of threads. The list is sorted by the order @@ -79,19 +81,24 @@ class NFA { typedef SparseArray<Thread*> Threadq; inline Thread* AllocThread(); - inline void FreeThread(Thread*); + inline Thread* Incref(Thread* t); + inline void Decref(Thread* t); - // Add id (or its children, following unlabeled arrows) - // to the workqueue q with associated capture info. - void AddToThreadq(Threadq* q, int id, int flag, - const char* p, const char** capture); + // Follows all empty arrows from id0 and enqueues all the states reached. + // Enqueues only the ByteRange instructions that match byte c. + // The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match. + // p is the current input position, and t0 is the current thread. + void AddToThreadq(Threadq* q, int id0, int c, int flag, + const char* p, Thread* t0); // Run runq on byte c, appending new states to nextq. // Updates matched_ and match_ as new, better matches are found. - // p is position of the next byte (the one after c) - // in the input string, used when processing capturing parens. - // flag is the bitwise or of Bol, Eol, etc., specifying whether - // ^, $ and \b match the current input point (after c). + // p is the position of the previous byte (the one before c) + // in the input string, used when processing Match instructions. + // flag is the bitwise OR of Bol, Eol, etc., specifying whether + // ^, $ and \b match the current input position (after c). + // Frees all the threads on runq. + // If there is a shortcut to the end, returns that shortcut. inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p); // Returns text version of capture information, for debugging. @@ -99,10 +106,6 @@ class NFA { inline void CopyCapture(const char** dst, const char** src); - // Computes whether all matches must begin with the same first - // byte, and if so, returns that byte. If not, returns -1. - int ComputeFirstByte(); - Prog* prog_; // underlying program int start_; // start instruction in program int ncapture_; // number of submatches to track @@ -115,7 +118,6 @@ class NFA { bool matched_; // any match so far? AddState* astack_; // pre-allocated for AddToThreadq int nastack_; - int first_byte_; // required first byte for match, or -1 if none Thread* free_threads_; // free list @@ -140,7 +142,6 @@ NFA::NFA(Prog* prog) { match_ = NULL; matched_ = false; free_threads_ = NULL; - first_byte_ = ComputeFirstByte(); } NFA::~NFA() { @@ -154,24 +155,36 @@ NFA::~NFA() { } } -void NFA::FreeThread(Thread *t) { - if (t == NULL) - return; - t->next = free_threads_; - free_threads_ = t; -} - NFA::Thread* NFA::AllocThread() { Thread* t = free_threads_; if (t == NULL) { t = new Thread; + t->ref = 1; t->capture = new const char*[ncapture_]; return t; } free_threads_ = t->next; + t->ref = 1; + return t; +} + +NFA::Thread* NFA::Incref(Thread* t) { + DCHECK(t != NULL); + t->ref++; return t; } +void NFA::Decref(Thread* t) { + if (t == NULL) + return; + t->ref--; + if (t->ref > 0) + return; + DCHECK_EQ(t->ref, 0); + t->next = free_threads_; + free_threads_ = t; +} + void NFA::CopyCapture(const char** dst, const char** src) { for (int i = 0; i < ncapture_; i+=2) { dst[i] = src[i]; @@ -180,11 +193,11 @@ void NFA::CopyCapture(const char** dst, const char** src) { } // Follows all empty arrows from id0 and enqueues all the states reached. +// Enqueues only the ByteRange instructions that match byte c. // The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match. -// The pointer p is the current input position, and m is the -// current set of match boundaries. -void NFA::AddToThreadq(Threadq* q, int id0, int flag, - const char* p, const char** capture) { +// p is the current input position, and t0 is the current thread. +void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag, + const char* p, Thread* t0) { if (id0 == 0) return; @@ -204,15 +217,19 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag, AddState a = stk[--nstk]; Loop: - if (a.j >= 0) - capture[a.j] = a.cap_j; + if (a.t != NULL) { + // t0 was a thread that we allocated and copied in order to + // record the capture, so we must now decref it. + Decref(t0); + t0 = a.t; + } int id = a.id; if (id == 0) continue; if (q->has_index(id)) { if (Debug) - fprintf(stderr, " [%d%s]\n", id, FormatCapture(capture).c_str()); + fprintf(stderr, " [%d%s]\n", id, FormatCapture(t0->capture).c_str()); continue; } @@ -235,8 +252,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag, case kInstAltMatch: // Save state; will pick up at next byte. - t = AllocThread(); - CopyCapture(t->capture, capture); + t = Incref(t0); *tp = t; DCHECK(!ip->last()); @@ -256,25 +272,32 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag, stk[nstk++] = AddState(id+1); if ((j=ip->cap()) < ncapture_) { - // Push a dummy whose only job is to restore capture[j] + // Push a dummy whose only job is to restore t0 // once we finish exploring this possibility. - stk[nstk++] = AddState(0, capture[j], j); + stk[nstk++] = AddState(0, t0); // Record capture. - capture[j] = p; + t = AllocThread(); + CopyCapture(t->capture, t0->capture); + t->capture[j] = p; + t0 = t; } a = AddState(ip->out()); goto Loop; - case kInstMatch: case kInstByteRange: + if (!ip->Matches(c)) + goto Next; + // Fallthrough intended. + + case kInstMatch: // Save state; will pick up at next byte. - t = AllocThread(); - CopyCapture(t->capture, capture); + t = Incref(t0); *tp = t; if (Debug) - fprintf(stderr, " + %d%s [%p]\n", id, FormatCapture(t->capture).c_str(), t); + fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str()); + Next: if (ip->last()) break; a = AddState(id+1); @@ -294,11 +317,11 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag, } // Run runq on byte c, appending new states to nextq. -// Updates match as new, better matches are found. -// p is position of the byte c in the input string, -// used when processing capturing parens. -// flag is the bitwise or of Bol, Eol, etc., specifying whether -// ^, $ and \b match the current input point (after c). +// Updates matched_ and match_ as new, better matches are found. +// p is the position of the previous byte (the one before c) +// in the input string, used when processing Match instructions. +// flag is the bitwise OR of Bol, Eol, etc., specifying whether +// ^, $ and \b match the current input position (after c). // Frees all the threads on runq. // If there is a shortcut to the end, returns that shortcut. int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { @@ -312,7 +335,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { if (longest_) { // Can skip any threads started after our current best match. if (matched_ && match_[0] < t->capture[0]) { - FreeThread(t); + Decref(t); continue; } } @@ -327,8 +350,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { break; case kInstByteRange: - if (ip->Matches(c)) - AddToThreadq(nextq, ip->out(), flag, p+1, t->capture); + AddToThreadq(nextq, ip->out(), c, flag, p+1, t); break; case kInstAltMatch: @@ -337,11 +359,12 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { // The match is ours if we want it. if (ip->greedy(prog_) || longest_) { CopyCapture(match_, t->capture); - FreeThread(t); + matched_ = true; + + Decref(t); for (++i; i != runq->end(); ++i) - FreeThread(i->second); + Decref(i->second); runq->clear(); - matched_ = true; if (ip->greedy(prog_)) return ip->out1(); return ip->out(); @@ -352,33 +375,35 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { if (endmatch_ && p != etext_) break; - t->capture[1] = p; if (longest_) { // Leftmost-longest mode: save this match only if // it is either farther to the left or at the same // point but longer than an existing match. if (!matched_ || t->capture[0] < match_[0] || - (t->capture[0] == match_[0] && t->capture[1] > match_[1])) + (t->capture[0] == match_[0] && p > match_[1])) { CopyCapture(match_, t->capture); + match_[1] = p; + matched_ = true; + } } else { // Leftmost-biased mode: this match is by definition // better than what we've already found (see next line). CopyCapture(match_, t->capture); + match_[1] = p; + matched_ = true; // Cut off the threads that can only find matches // worse than the one we just found: don't run the // rest of the current Threadq. - FreeThread(t); + Decref(t); for (++i; i != runq->end(); ++i) - FreeThread(i->second); + Decref(i->second); runq->clear(); - matched_ = true; return 0; } - matched_ = true; break; } - FreeThread(t); + Decref(t); } runq->clear(); return 0; @@ -454,7 +479,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, match_ = new const char*[ncapture_]; matched_ = false; - memset(match_, 0, ncapture_*sizeof match_[0]); // For debugging prints. btext_ = context.begin(); @@ -471,14 +495,10 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, runq->clear(); nextq->clear(); memset(&match_[0], 0, ncapture_*sizeof match_[0]); - const char* bp = context.begin(); - int c = -1; int wasword = 0; - if (text.begin() > context.begin()) { - c = text.begin()[-1] & 0xFF; - wasword = Prog::IsWordChar(static_cast<uint8>(c)); - } + if (text.begin() > context.begin()) + wasword = Prog::IsWordChar(text.begin()[-1] & 0xFF); // Loop over the text, stepping the machine. for (const char* p = text.begin();; p++) { @@ -508,7 +528,15 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, flag |= kEmptyNonWordBoundary; if (Debug) { - fprintf(stderr, "%c[%#x/%d/%d]:", p > text.end() ? '$' : p == bp ? '^' : c, flag, isword, wasword); + int c = 0; + if (p == context.begin()) + c = '^'; + else if (p > text.end()) + c = '$'; + else if (p < text.end()) + c = p[0] & 0xFF; + + fprintf(stderr, "%c[%#x/%d/%d]:", c, flag, isword, wasword); for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { Thread* t = i->second; if (t == NULL) @@ -518,11 +546,12 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, fprintf(stderr, "\n"); } - // Process previous character (waited until now to avoid - // repeating the flag computation above). - // This is a no-op the first time around the loop, because - // runq is empty. - int id = Step(runq, nextq, c, flag, p-1); + // Note that we pass p-1 to Step() because it needs the previous pointer + // value in order to handle Match instructions appropriately. It plumbs + // c and flag through to AddToThreadq() along with p-1+1, which is p. + // + // This is a no-op the first time around the loop because runq is empty. + int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, flag, p-1); DCHECK_EQ(runq->size(), 0); swap(nextq, runq); nextq->clear(); @@ -567,10 +596,10 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, // If there's a required first byte for an unanchored search // and we're not in the middle of any possible matches, // use memchr to search for the byte quickly. - if (!anchored && first_byte_ >= 0 && runq->size() == 0 && - p < text.end() && (p[0] & 0xFF) != first_byte_) { - p = reinterpret_cast<const char*>(memchr(p, first_byte_, - text.end() - p)); + int fb = prog_->first_byte(); + if (!anchored && runq->size() == 0 && + fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) { + p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p)); if (p == NULL) { p = text.end(); isword = 0; @@ -580,8 +609,11 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, flag = Prog::EmptyFlags(context, p); } - match_[0] = p; - AddToThreadq(runq, start_, flag, p, match_); + Thread* t = AllocThread(); + CopyCapture(t->capture, match_); + t->capture[0] = p; + AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, flag, p, t); + Decref(t); } // If all the threads have died, stop early. @@ -591,17 +623,11 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, break; } - if (p == text.end()) - c = 0; - else - c = *p & 0xFF; wasword = isword; - - // Will run step(runq, nextq, c, ...) on next iteration. See above. } for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) - FreeThread(i->second); + Decref(i->second); if (matched_) { for (int i = 0; i < nsubmatch; i++) @@ -619,27 +645,19 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, // Computes whether all successful matches have a common first byte, // and if so, returns that byte. If not, returns -1. -int NFA::ComputeFirstByte() { - if (start_ == 0) - return -1; - - int b = -1; // first byte, not yet computed - - typedef SparseSet Workq; - Workq q(prog_->size()); - q.insert(start_); - for (Workq::iterator it = q.begin(); it != q.end(); ++it) { +int Prog::ComputeFirstByte() { + int b = -1; + SparseSet q(size()); + q.insert(start()); + for (SparseSet::iterator it = q.begin(); it != q.end(); ++it) { int id = *it; - Prog::Inst* ip = prog_->inst(id); + Prog::Inst* ip = inst(id); switch (ip->opcode()) { default: LOG(DFATAL) << "unhandled " << ip->opcode() << " in ComputeFirstByte"; break; case kInstMatch: - if (!ip->last()) - q.insert(id+1); - // The empty string matches: no first byte. return -1; diff --git a/re2/onepass.cc b/re2/onepass.cc index 977b0e7..1d745d4 100644 --- a/re2/onepass.cc +++ b/re2/onepass.cc @@ -392,9 +392,12 @@ bool Prog::IsOnePass() { // Flood the graph starting at the start state, and check // that in each reachable state, each possible byte leads // to a unique next state. - int size = this->size(); - InstCond *stack = new InstCond[size]; + int stacksize = inst_count(kInstCapture) + + inst_count(kInstEmptyWidth) + + inst_count(kInstNop) + 1; // + 1 for start inst + InstCond* stack = new InstCond[stacksize]; + int size = this->size(); int* nodebyid = new int[size]; // indexed by ip memset(nodebyid, 0xFF, size*sizeof nodebyid[0]); @@ -424,8 +427,10 @@ bool Prog::IsOnePass() { stack[nstack++].cond = 0; while (nstack > 0) { int id = stack[--nstack].id; - Prog::Inst* ip = inst(id); uint32 cond = stack[nstack].cond; + + Loop: + Prog::Inst* ip = inst(id); switch (ip->opcode()) { default: LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); @@ -438,26 +443,16 @@ bool Prog::IsOnePass() { // If already on work queue, (1) is violated: bail out. if (!AddQ(&workq, id+1)) goto fail; - stack[nstack].id = id+1; - stack[nstack++].cond = cond; - break; + id = id+1; + goto Loop; case kInstByteRange: { - if (!ip->last()) { - // If already on work queue, (1) is violated: bail out. - if (!AddQ(&workq, id+1)) - goto fail; - stack[nstack].id = id+1; - stack[nstack++].cond = cond; - } - int nextindex = nodebyid[ip->out()]; if (nextindex == -1) { if (nalloc >= maxnodes) { if (Debug) - LOG(ERROR) - << StringPrintf("Not OnePass: hit node limit %d > %d", - nalloc, maxnodes); + LOG(ERROR) << StringPrintf( + "Not OnePass: hit node limit %d > %d", nalloc, maxnodes); goto fail; } nextindex = nalloc; @@ -466,21 +461,21 @@ bool Prog::IsOnePass() { nalloc++; AddQ(&tovisit, ip->out()); } - if (matched) - cond |= kMatchWins; for (int c = ip->lo(); c <= ip->hi(); c++) { int b = bytemap_[c]; - c = unbytemap_[b]; // last c in byte class + // Skip any bytes immediately after c that are also in b. + while (c < 256-1 && bytemap_[c+1] == b) + c++; uint32 act = node->action[b]; uint32 newact = (nextindex << kIndexShift) | cond; + if (matched) + newact |= kMatchWins; if ((act & kImpossible) == kImpossible) { node->action[b] = newact; } else if (act != newact) { if (Debug) { - LOG(ERROR) - << StringPrintf("Not OnePass: conflict on byte " - "%#x at state %d", - c, *it); + LOG(ERROR) << StringPrintf( + "Not OnePass: conflict on byte %#x at state %d", c, *it); } goto fail; } @@ -490,23 +485,32 @@ bool Prog::IsOnePass() { Rune hi = min<Rune>(ip->hi(), 'z') + 'A' - 'a'; for (int c = lo; c <= hi; c++) { int b = bytemap_[c]; - c = unbytemap_[b]; // last c in class + // Skip any bytes immediately after c that are also in b. + while (c < 256-1 && bytemap_[c+1] == b) + c++; uint32 act = node->action[b]; uint32 newact = (nextindex << kIndexShift) | cond; + if (matched) + newact |= kMatchWins; if ((act & kImpossible) == kImpossible) { node->action[b] = newact; } else if (act != newact) { if (Debug) { - LOG(ERROR) - << StringPrintf("Not OnePass: conflict on byte " - "%#x at state %d", - c, *it); + LOG(ERROR) << StringPrintf( + "Not OnePass: conflict on byte %#x at state %d", c, *it); } goto fail; } } } - break; + + if (ip->last()) + break; + // If already on work queue, (1) is violated: bail out. + if (!AddQ(&workq, id+1)) + goto fail; + id = id+1; + goto Loop; } case kInstCapture: @@ -534,36 +538,33 @@ bool Prog::IsOnePass() { // If already on work queue, (1) is violated: bail out. if (!AddQ(&workq, ip->out())) { if (Debug) { - LOG(ERROR) << StringPrintf("Not OnePass: multiple paths" - " %d -> %d\n", - *it, ip->out()); + LOG(ERROR) << StringPrintf( + "Not OnePass: multiple paths %d -> %d\n", *it, ip->out()); } goto fail; } - stack[nstack].id = ip->out(); - stack[nstack++].cond = cond; - break; + id = ip->out(); + goto Loop; case kInstMatch: - if (!ip->last()) { - // If already on work queue, (1) is violated: bail out. - if (!AddQ(&workq, id+1)) - goto fail; - stack[nstack].id = id+1; - stack[nstack++].cond = cond; - } - if (matched) { // (3) is violated if (Debug) { - LOG(ERROR) << StringPrintf("Not OnePass: multiple matches" - " from %d\n", *it); + LOG(ERROR) << StringPrintf( + "Not OnePass: multiple matches from %d\n", *it); } goto fail; } matched = true; node->matchcond = cond; - break; + + if (ip->last()) + break; + // If already on work queue, (1) is violated: bail out. + if (!AddQ(&workq, id+1)) + goto fail; + id = id+1; + goto Loop; case kInstFail: break; @@ -572,28 +573,21 @@ bool Prog::IsOnePass() { } if (Debug) { // For debugging, dump one-pass NFA to LOG(ERROR). - string dump = "prog dump:\n" + Dump() + "node dump\n"; + LOG(ERROR) << "bytemap:\n" << DumpByteMap(); + LOG(ERROR) << "prog:\n" << Dump(); + map<int, int> idmap; for (int i = 0; i < size; i++) if (nodebyid[i] != -1) idmap[nodebyid[i]] = i; - StringAppendF(&dump, "byte ranges:\n"); - int i = 0; - for (int b = 0; b < bytemap_range_; b++) { - int lo = i; - while (bytemap_[i] == b) - i++; - StringAppendF(&dump, "\t%d: %#x-%#x\n", b, lo, i - 1); - } - + string dump; for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { int id = *it; int nodeindex = nodebyid[id]; if (nodeindex == -1) - continue; + continue; OneState* node = IndexToNode(nodes, statesize, nodeindex); - string s; StringAppendF(&dump, "node %d id=%d: matchcond=%#x\n", nodeindex, id, node->matchcond); for (int i = 0; i < bytemap_range_; i++) { @@ -605,7 +599,7 @@ bool Prog::IsOnePass() { idmap[node->action[i] >> kIndexShift]); } } - LOG(ERROR) << dump; + LOG(ERROR) << "nodes:\n" << dump; } // Overallocated earlier; cut down to actual size. diff --git a/re2/prog.cc b/re2/prog.cc index 15f10e1..9098dd4 100644 --- a/re2/prog.cc +++ b/re2/prog.cc @@ -99,26 +99,22 @@ Prog::Prog() start_unanchored_(0), size_(0), bytemap_range_(0), + first_byte_(-1), flags_(0), onepass_statesize_(0), inst_(NULL), dfa_first_(NULL), dfa_longest_(NULL), dfa_mem_(0), - delete_dfa_(NULL), - unbytemap_(NULL), onepass_nodes_(NULL), onepass_start_(NULL) { } Prog::~Prog() { - if (delete_dfa_ != NULL) { - delete_dfa_(&dfa_first_); - delete_dfa_(&dfa_longest_); - } + DeleteDFA(&dfa_first_); + DeleteDFA(&dfa_longest_); delete[] onepass_nodes_; delete[] inst_; - delete[] unbytemap_; } typedef SparseSet Workq; @@ -154,23 +150,12 @@ static string FlattenedProgToString(Prog* prog, int start) { } string Prog::Dump() { - string map; - if (false) { // Debugging - int lo = 0; - StringAppendF(&map, "byte map:\n"); - for (int i = 0; i < bytemap_range_; i++) { - StringAppendF(&map, "\t%d. [%02x-%02x]\n", i, lo, unbytemap_[i]); - lo = unbytemap_[i] + 1; - } - StringAppendF(&map, "\n"); - } - if (did_flatten_) - return map + FlattenedProgToString(this, start_); + return FlattenedProgToString(this, start_); Workq q(size_); AddToQueue(&q, start_); - return map + ProgToString(this, &q); + return ProgToString(this, &q); } string Prog::DumpUnanchored() { @@ -182,6 +167,26 @@ string Prog::DumpUnanchored() { return ProgToString(this, &q); } +string Prog::DumpByteMap() { + string map; + for (int c = 0; c < 256; c++) { + int b = bytemap_[c]; + int lo = c; + while (c < 256-1 && bytemap_[c+1] == b) + c++; + int hi = c; + StringAppendF(&map, "[%02x-%02x] -> %d\n", lo, hi, b); + } + return map; +} + +int Prog::first_byte() { + std::call_once(first_byte_once_, [this]() { + first_byte_ = ComputeFirstByte(); + }); + return first_byte_; +} + static bool IsMatch(Prog*, Prog::Inst*); // Peep-hole optimizer. @@ -308,46 +313,72 @@ uint32 Prog::EmptyFlags(const StringPiece& text, const char* p) { return flags; } -void Prog::MarkByteRange(int lo, int hi) { - DCHECK_GE(lo, 0); - DCHECK_GE(hi, 0); - DCHECK_LE(lo, 255); - DCHECK_LE(hi, 255); - DCHECK_LE(lo, hi); - if (0 < lo && lo <= 255) - byterange_.Set(lo - 1); - if (0 <= hi && hi <= 255) - byterange_.Set(hi); -} - void Prog::ComputeByteMap() { - // Fill in bytemap with byte classes for prog_. - // Ranges of bytes that are treated as indistinguishable - // by the regexp program are mapped to a single byte class. - // The vector prog_->byterange() marks the end of each - // such range. - const Bitmap<256>& v = byterange(); + // Fill in byte map with byte classes for the program. + // Ranges of bytes that are treated indistinguishably + // are mapped to a single byte class. + Bitmap<256> v; + + // Don't repeat the work for \b and \B. + bool done_word_boundaries = false; + + for (int id = 0; id < static_cast<int>(size()); id++) { + Inst* ip = inst(id); + if (ip->opcode() == kInstByteRange) { + int lo = ip->lo(); + int hi = ip->hi(); + if (0 < lo) + v.Set(lo - 1); + v.Set(hi); + if (ip->foldcase() && lo <= 'z' && hi >= 'a') { + if (lo < 'a') + lo = 'a'; + if (hi > 'z') + hi = 'z'; + if (lo <= hi) { + v.Set(lo + 'A' - 'a' - 1); + v.Set(hi + 'A' - 'a'); + } + } + } else if (ip->opcode() == kInstEmptyWidth) { + if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine)) { + v.Set('\n' - 1); + v.Set('\n'); + } + if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary)) { + if (done_word_boundaries) + continue; + int j; + for (int i = 0; i < 256; i = j) { + for (j = i + 1; j < 256 && + Prog::IsWordChar(static_cast<uint8>(i)) == + Prog::IsWordChar(static_cast<uint8>(j)); + j++) + ; + if (0 < i) + v.Set(i - 1); + v.Set(j - 1); + } + done_word_boundaries = true; + } + } + } COMPILE_ASSERT(8*sizeof(v.Word(0)) == 32, wordsize); uint8 n = 0; uint32 bits = 0; for (int i = 0; i < 256; i++) { - if ((i&31) == 0) + if ((i & 31) == 0) bits = v.Word(i >> 5); bytemap_[i] = n; n += bits & 1; bits >>= 1; } bytemap_range_ = bytemap_[255] + 1; - unbytemap_ = new uint8[bytemap_range_]; - for (int i = 0; i < 256; i++) - unbytemap_[bytemap_[i]] = static_cast<uint8>(i); if (0) { // For debugging: use trivial byte map. - for (int i = 0; i < 256; i++) { + for (int i = 0; i < 256; i++) bytemap_[i] = static_cast<uint8>(i); - unbytemap_[i] = static_cast<uint8>(i); - } bytemap_range_ = 256; LOG(INFO) << "Using trivial bytemap."; } @@ -377,12 +408,12 @@ void Prog::Flatten() { for (SparseArray<int>::const_iterator i = rootmap.begin(); i != rootmap.end(); ++i) { - flatmap[i->value()] = flat.size(); + flatmap[i->value()] = static_cast<int>(flat.size()); EmitList(i->index(), &rootmap, &flat, &q, &stk); flat.back().set_last(); } - list_count_ = flatmap.size(); + list_count_ = static_cast<int>(flatmap.size()); for (int i = 0; i < kNumInst; i++) inst_count_[i] = 0; @@ -412,7 +443,7 @@ void Prog::Flatten() { } // Finally, replace the old instructions with the new instructions. - size_ = flat.size(); + size_ = static_cast<int>(flat.size()); delete[] inst_; inst_ = new Inst[size_]; memmove(inst_, flat.data(), size_ * sizeof *inst_); @@ -503,8 +534,8 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap, vector<Inst>* flat, case kInstAltMatch: flat->emplace_back(); flat->back().set_opcode(kInstAltMatch); - flat->back().set_out(flat->size()); - flat->back().out1_ = flat->size()+1; + flat->back().set_out(static_cast<int>(flat->size())); + flat->back().out1_ = static_cast<uint32>(flat->size())+1; // Fall through. case kInstAlt: @@ -218,7 +218,6 @@ class Prog { void set_reversed(bool reversed) { reversed_ = reversed; } int list_count() { return list_count_; } int inst_count(InstOp op) { return inst_count_[op]; } - const Bitmap<256>& byterange() { return byterange_; } void set_dfa_mem(int64 dfa_mem) { dfa_mem_ = dfa_mem; } int64 dfa_mem() { return dfa_mem_; } int flags() { return flags_; } @@ -230,15 +229,13 @@ class Prog { int bytemap_range() { return bytemap_range_; } const uint8* bytemap() { return bytemap_; } + // Lazily computed. + int first_byte(); + // Returns string representation of program for debugging. string Dump(); string DumpUnanchored(); - - // Record that at some point in the prog, the bytes in the range - // lo-hi (inclusive) are treated as different from bytes outside the range. - // Tracking this lets the DFA collapse commonly-treated byte ranges - // when recording state pointers, greatly reducing its memory footprint. - void MarkByteRange(int lo, int hi); + string DumpByteMap(); // Returns the set of kEmpty flags that are in effect at // position p within context. @@ -297,6 +294,10 @@ class Prog { // Compute byte map. void ComputeByteMap(); + // Computes whether all matches must begin with the same first + // byte, and if so, returns that byte. If not, returns -1. + int ComputeFirstByte(); + // Run peep-hole optimizer on program. void Optimize(); @@ -370,6 +371,7 @@ class Prog { friend class Compiler; DFA* GetDFA(MatchKind kind); + void DeleteDFA(std::atomic<DFA*>* pdfa); bool anchor_start_; // regexp has explicit start anchor bool anchor_end_; // regexp has explicit end anchor @@ -381,6 +383,7 @@ class Prog { int start_unanchored_; // unanchored entry point for program int size_; // number of instructions int bytemap_range_; // bytemap_[x] < bytemap_range_ + int first_byte_; // required first byte for match, or -1 if none int flags_; // regexp parse flags int onepass_statesize_; // byte size of each OneState* node @@ -393,16 +396,14 @@ class Prog { std::atomic<DFA*> dfa_first_; // DFA cached for kFirstMatch std::atomic<DFA*> dfa_longest_; // DFA cached for kLongestMatch and kFullMatch int64 dfa_mem_; // Maximum memory for DFAs. - void (*delete_dfa_)(std::atomic<DFA*>* pdfa); - Bitmap<256> byterange_; // byterange.Get(x) true if x ends a - // commonly-treated byte range. uint8 bytemap_[256]; // map from input bytes to byte classes - uint8 *unbytemap_; // bytemap_[unbytemap_[x]] == x uint8* onepass_nodes_; // data for OnePass nodes OneState* onepass_start_; // start node for OnePass program + std::once_flag first_byte_once_; + DISALLOW_COPY_AND_ASSIGN(Prog); }; @@ -26,11 +26,6 @@ namespace re2 { static const int kMaxArgs = 16; static const int kVecSize = 1+kMaxArgs; -const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::FullMatchN> RE2::FullMatch = {}; -const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::PartialMatchN> RE2::PartialMatch = {}; -const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume = {}; -const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume = {}; - // This will trigger LNK2005 error in MSVC. #ifndef _MSC_VER const int RE2::Options::kDefaultMaxMem; // initialized in re2.h @@ -184,7 +184,6 @@ #include <mutex> #include <string> #include "re2/stringpiece.h" -#include "re2/variadic_function.h" #ifndef RE2_HAVE_LONGLONG #define RE2_HAVE_LONGLONG 1 @@ -251,7 +250,7 @@ class RE2 { RE2(const string& pattern); #endif RE2(const StringPiece& pattern); - RE2(const StringPiece& pattern, const Options& option); + RE2(const StringPiece& pattern, const Options& options); ~RE2(); // Returns whether RE2 was created properly. @@ -290,11 +289,11 @@ class RE2 { /***** The useful part: the matching interface *****/ - // Matches "text" against "pattern". If pointer arguments are + // Matches "text" against "re". If pointer arguments are // supplied, copies matched sub-patterns into them. // // You can pass in a "const char*" or a "string" for "text". - // You can pass in a "const char*" or a "string" or a "RE2" for "pattern". + // You can pass in a "const char*" or a "string" or a "RE2" for "re". // // The provided pointer arguments can be pointers to any scalar numeric // type, or one of: @@ -304,7 +303,7 @@ class RE2 { // (void*)NULL (the corresponding matched sub-pattern is not copied) // // Returns true iff all of the following conditions are satisfied: - // a. "text" matches "pattern" exactly + // a. "text" matches "re" exactly // b. The number of matched sub-patterns is >= number of supplied pointers // c. The "i"th argument has a suitable type for holding the // string captured as the "i"th sub-pattern. If you pass in @@ -320,32 +319,60 @@ class RE2 { // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); static bool FullMatchN(const StringPiece& text, const RE2& re, const Arg* const args[], int argc); - static const VariadicFunction2< - bool, const StringPiece&, const RE2&, Arg, RE2::FullMatchN> FullMatch; - // Exactly like FullMatch(), except that "pattern" is allowed to match + // Exactly like FullMatch(), except that "re" is allowed to match // a substring of "text". - static bool PartialMatchN(const StringPiece& text, const RE2& re, // 3..16 args + static bool PartialMatchN(const StringPiece& text, const RE2& re, const Arg* const args[], int argc); - static const VariadicFunction2< - bool, const StringPiece&, const RE2&, Arg, RE2::PartialMatchN> PartialMatch; - // Like FullMatch() and PartialMatch(), except that pattern has to - // match a prefix of "text", and "input" is advanced past the matched + // Like FullMatch() and PartialMatch(), except that "re" has to match + // a prefix of the text, and "input" is advanced past the matched // text. Note: "input" is modified iff this routine returns true. - static bool ConsumeN(StringPiece* input, const RE2& pattern, // 3..16 args + static bool ConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int argc); - static const VariadicFunction2< - bool, StringPiece*, const RE2&, Arg, RE2::ConsumeN> Consume; - - // Like Consume(..), but does not anchor the match at the beginning of the - // string. That is, "pattern" need not start its match at the beginning of - // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next - // word in "s" and stores it in "word". - static bool FindAndConsumeN(StringPiece* input, const RE2& pattern, - const Arg* const args[], int argc); - static const VariadicFunction2< - bool, StringPiece*, const RE2&, Arg, RE2::FindAndConsumeN> FindAndConsume; + + // Like Consume(), but does not anchor the match at the beginning of + // the text. That is, "re" need not start its match at the beginning + // of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds + // the next word in "s" and stores it in "word". + static bool FindAndConsumeN(StringPiece* input, const RE2& re, + const Arg* const args[], int argc); + +#ifndef SWIG + private: + template <typename F, typename SP, typename... A> + static inline bool Apply(F f, SP sp, const RE2& re, const A&... a) { + const Arg* const args[] = {&a...}; + const int argc = sizeof...(a); + return f(sp, re, args, argc); + } + + public: + // In order to allow FullMatch() et al. to be called with a varying number + // of arguments of varying types, we use two layers of variadic templates. + // The first layer constructs the temporary Arg objects. The second layer + // (above) constructs the array of pointers to the temporary Arg objects. + + template <typename... A> + static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) { + return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...); + } + + template <typename... A> + static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) { + return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...); + } + + template <typename... A> + static bool Consume(StringPiece* input, const RE2& re, A&&... a) { + return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...); + } + + template <typename... A> + static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) { + return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...); + } +#endif // Replace the first match of "pattern" in "str" with "rewrite". // Within "rewrite", backslash-escaped digits (\1 to \9) can be @@ -623,19 +650,7 @@ class RE2 { void set_one_line(bool b) { one_line_ = b; } void Copy(const Options& src) { - encoding_ = src.encoding_; - posix_syntax_ = src.posix_syntax_; - longest_match_ = src.longest_match_; - log_errors_ = src.log_errors_; - max_mem_ = src.max_mem_; - literal_ = src.literal_; - never_nl_ = src.never_nl_; - dot_nl_ = src.dot_nl_; - never_capture_ = src.never_capture_; - case_sensitive_ = src.case_sensitive_; - perl_classes_ = src.perl_classes_; - word_boundary_ = src.word_boundary_; - one_line_ = src.one_line_; + *this = src; } int ParseFlags() const; @@ -654,10 +669,6 @@ class RE2 { bool perl_classes_; bool word_boundary_; bool one_line_; - - //DISALLOW_COPY_AND_ASSIGN(Options); - Options(const Options&); - void operator=(const Options&); }; // Returns the options set in the constructor. diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc index 05e7ea8..0c598e0 100644 --- a/re2/testing/compile_test.cc +++ b/re2/testing/compile_test.cc @@ -143,6 +143,29 @@ TEST(TestRegexpCompileToProg, Simple) { EXPECT_EQ(failed, 0); } +// The distinct byte ranges involved in the Latin-1 dot ([^\n]). +static struct Latin1ByteRange { + int lo; + int hi; +} latin1ranges[] = { + { 0x00, 0x09 }, + { 0x0A, 0x0A }, + { 0x0B, 0xFF }, +}; + +TEST(TestCompile, Latin1Ranges) { + Regexp* re = Regexp::Parse(".", Regexp::PerlX|Regexp::Latin1, NULL); + EXPECT_TRUE(re != NULL); + Prog* prog = re->CompileToProg(0); + EXPECT_TRUE(prog != NULL); + EXPECT_EQ(prog->bytemap_range(), arraysize(latin1ranges)); + for (int i = 0; i < arraysize(latin1ranges); i++) + for (int j = latin1ranges[i].lo; j <= latin1ranges[i].hi; j++) + EXPECT_EQ(prog->bytemap()[j], i) << " byte " << j; + delete prog; + re->Decref(); +} + // The distinct byte ranges involved in the UTF-8 dot ([^\n]). // Once, erroneously split between 0x3f and 0x40 because it is // a 6-bit boundary. @@ -152,7 +175,7 @@ static struct UTF8ByteRange { } utf8ranges[] = { { 0x00, 0x09 }, { 0x0A, 0x0A }, - { 0x10, 0x7F }, + { 0x0B, 0x7F }, { 0x80, 0x8F }, { 0x90, 0x9F }, { 0xA0, 0xBF }, @@ -166,7 +189,7 @@ static struct UTF8ByteRange { { 0xF5, 0xFF }, }; -TEST(TestCompile, ByteRanges) { +TEST(TestCompile, UTF8Ranges) { Regexp* re = Regexp::Parse(".", Regexp::PerlX, NULL); EXPECT_TRUE(re != NULL); Prog* prog = re->CompileToProg(0); diff --git a/re2/variadic_function.h b/re2/variadic_function.h deleted file mode 100644 index 7c7d6d5..0000000 --- a/re2/variadic_function.h +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright 2010 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef RE2_VARIADIC_FUNCTION_H_ -#define RE2_VARIADIC_FUNCTION_H_ - -namespace re2 { - -template <typename Result, typename Param0, typename Param1, typename Arg, - Result (*Func)(Param0, Param1, const Arg* const [], int count)> -class VariadicFunction2 { - public: - Result operator()(Param0 p0, Param1 p1) const { - return Func(p0, p1, 0, 0); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0) const { - const Arg* const args[] = { &a0 }; - return Func(p0, p1, args, 1); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1) const { - const Arg* const args[] = { &a0, &a1 }; - return Func(p0, p1, args, 2); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2) const { - const Arg* const args[] = { &a0, &a1, &a2 }; - return Func(p0, p1, args, 3); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3 }; - return Func(p0, p1, args, 4); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4 }; - return Func(p0, p1, args, 5); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5 }; - return Func(p0, p1, args, 6); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6 }; - return Func(p0, p1, args, 7); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7 }; - return Func(p0, p1, args, 8); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8 }; - return Func(p0, p1, args, 9); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9 }; - return Func(p0, p1, args, 10); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10 }; - return Func(p0, p1, args, 11); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11 }; - return Func(p0, p1, args, 12); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12 }; - return Func(p0, p1, args, 13); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13 }; - return Func(p0, p1, args, 14); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14 }; - return Func(p0, p1, args, 15); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15 }; - return Func(p0, p1, args, 16); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16 }; - return Func(p0, p1, args, 17); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17 }; - return Func(p0, p1, args, 18); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18 }; - return Func(p0, p1, args, 19); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19 }; - return Func(p0, p1, args, 20); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, - &a20 }; - return Func(p0, p1, args, 21); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21 }; - return Func(p0, p1, args, 22); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22 }; - return Func(p0, p1, args, 23); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22, const Arg& a23) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22, &a23 }; - return Func(p0, p1, args, 24); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22, const Arg& a23, const Arg& a24) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22, &a23, &a24 }; - return Func(p0, p1, args, 25); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22, &a23, &a24, &a25 }; - return Func(p0, p1, args, 26); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, - const Arg& a26) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22, &a23, &a24, &a25, &a26 }; - return Func(p0, p1, args, 27); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, - const Arg& a26, const Arg& a27) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22, &a23, &a24, &a25, &a26, &a27 }; - return Func(p0, p1, args, 28); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, - const Arg& a26, const Arg& a27, const Arg& a28) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28 }; - return Func(p0, p1, args, 29); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, - const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29 }; - return Func(p0, p1, args, 30); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, - const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29, - const Arg& a30) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30 }; - return Func(p0, p1, args, 31); - } - - Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, - const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, - const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, - const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, - const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, - const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, - const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, - const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29, - const Arg& a30, const Arg& a31) const { - const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, - &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, - &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30, &a31 }; - return Func(p0, p1, args, 32); - } -}; - -} // namespace re2 - -#endif // RE2_VARIADIC_FUNCTION_H_ diff --git a/util/fuzz.cc b/util/fuzz.cc new file mode 100644 index 0000000..9cac118 --- /dev/null +++ b/util/fuzz.cc @@ -0,0 +1,21 @@ +// Copyright 2016 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +// Entry point for libFuzzer. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); + +int main(int argc, char** argv) { + uint8_t data[32]; + for (int i = 0; i < 32; i++) { + for (int j = 0; j < 32; j++) { + data[j] = random() & 0xFF; + } + LLVMFuzzerTestOneInput(data, 32); + } + return 0; +} diff --git a/util/test.cc b/util/test.cc index b0167e7..0a751fe 100644 --- a/util/test.cc +++ b/util/test.cc @@ -23,7 +23,7 @@ void RegisterTest(void (*fn)(void), const char *name) { tests[ntests++].name = name; } -int main(int argc, char **argv) { +int main(int argc, char** argv) { for (int i = 0; i < ntests; i++) { printf("%s\n", tests[i].name); tests[i].fn(); diff --git a/util/util.h b/util/util.h index 7dfab93..1e78aa3 100644 --- a/util/util.h +++ b/util/util.h @@ -32,6 +32,7 @@ #include <set> #include <atomic> #include <mutex> // For std::call_once +#include <unordered_set> // Use std names. using std::set; @@ -46,22 +47,7 @@ using std::stack; using std::sort; using std::swap; using std::make_pair; - -#if defined(__GNUC__) && !defined(USE_CXX0X) && !defined(_LIBCPP_ABI_VERSION) - -#include <tr1/unordered_set> -using std::tr1::unordered_set; - -#else - -#include <unordered_set> -#if defined(_WIN32) -using std::tr1::unordered_set; -#else using std::unordered_set; -#endif - -#endif #ifdef _WIN32 |