Imported Upstream version 20160401upstream/20160401

Change-Id: I05a22e9add0c0dc9c7cc5d28b8ceedd442fca31c Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
author: DongHun Kwak <dh0128.kwak@samsung.com> 2016-11-21 16:57:37 +0900
committer: DongHun Kwak <dh0128.kwak@samsung.com> 2016-11-21 16:57:37 +0900
commit: 6c0bc52e1c8854d1a96810f0fe9849d83a7c409c (patch)
tree: fc11f96d6acda48ef0b297817c5f6110aa81351b
parent: 4dfe393027cfcfe844a8c8b97209a714904308c4 (diff)
download: re2-6c0bc52e1c8854d1a96810f0fe9849d83a7c409c.tar.gz
re2-6c0bc52e1c8854d1a96810f0fe9849d83a7c409c.tar.bz2
re2-6c0bc52e1c8854d1a96810f0fe9849d83a7c409c.zip
26 files changed, 758 insertions, 392 deletions
diff --git a/BUILD b/BUILD
index 2636631..1e2d19c 100644
--- a/BUILD
+++ b/BUILD
@@ -6,6 +6,8 @@
 
 licenses(["notice"])
 
+exports_files(["LICENSE"])
+
 cc_library(
     name = "re2",
     srcs = [
@@ -97,25 +99,61 @@ cc_library(
 load("re2_test", "re2_test")
 
 re2_test("charclass_test")
+
 re2_test("compile_test")
+
 re2_test("filtered_re2_test")
+
 re2_test("mimics_pcre_test")
+
 re2_test("parse_test")
+
 re2_test("possible_match_test")
+
 re2_test("re2_arg_test")
+
 re2_test("re2_test")
+
 re2_test("regexp_test")
+
 re2_test("required_prefix_test")
+
 re2_test("search_test")
+
 re2_test("set_test")
+
 re2_test("simplify_test")
+
 re2_test("string_generator_test")
 
-re2_test("dfa_test", size="large")
-re2_test("exhaustive1_test", size="large")
-re2_test("exhaustive2_test", size="large")
-re2_test("exhaustive3_test", size="large")
-re2_test("exhaustive_test", size="large")
-re2_test("random_test", size="large")
+re2_test(
+    "dfa_test",
+    size = "large",
+)
+
+re2_test(
+    "exhaustive1_test",
+    size = "large",
+)
+
+re2_test(
+    "exhaustive2_test",
+    size = "large",
+)
+
+re2_test(
+    "exhaustive3_test",
+    size = "large",
+)
+
+re2_test(
+    "exhaustive_test",
+    size = "large",
+)
+
+re2_test(
+    "random_test",
+    size = "large",
+)
 
 # TODO: Add support for regexp_benchmark.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3ed718f..66cfdc6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,9 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
   if(MSVC_VERSION LESS 1800)
     message(FATAL_ERROR "you need Visual Studio 2013 or later")
   endif()
+  if(BUILD_SHARED_LIBS)
+    message(FATAL_ERROR "building a DLL is not currently supported")
+  endif()
 elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
   add_compile_options(-std=c++11)
 endif()
diff --git a/Makefile b/Makefile
index 92fbfc3..4479033 100644
--- a/Makefile
+++ b/Makefile
@@ -44,7 +44,7 @@ ifeq ($(shell uname),Darwin)
 SOEXT=dylib
 SOEXTVER=$(SONAME).$(SOEXT)
 SOEXTVER00=$(SONAME).0.0.$(SOEXT)
-MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib $(LDFLAGS) -Wl,-install_name,@rpath/libre2.$(SOEXTVER) -exported_symbols_list libre2.symbols.darwin
+MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-install_name,@rpath/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(LDFLAGS)
 else ifeq ($(shell uname),SunOS)
 SOEXT=so
 SOEXTVER=$(SOEXT).$(SONAME)
@@ -275,11 +275,15 @@ install: obj/libre2.a obj/so/libre2.$(SOEXT)
 testinstall:
 	@mkdir -p obj
 	cp testinstall.cc obj
-ifneq ($(shell uname),Darwin)
-	(cd obj && $(CXX) -I$(DESTDIR)$(includedir) -L$(DESTDIR)$(libdir) testinstall.cc -lre2 -pthread -static -o testinstall)
+ifeq ($(shell uname),Darwin)
+	@echo Skipping test for libre2.a on Darwin.
+else ifeq ($(shell uname),SunOS)
+	@echo Skipping test for libre2.a on SunOS.
+else
+	(cd obj && $(CXX) $(CXXFLAGS) -I$(DESTDIR)$(includedir) -L$(DESTDIR)$(libdir) testinstall.cc -l:libre2.a -o testinstall)
 	obj/testinstall
 endif
-	(cd obj && $(CXX) -I$(DESTDIR)$(includedir) -L$(DESTDIR)$(libdir) testinstall.cc -lre2 -pthread -o testinstall)
+	(cd obj && $(CXX) $(CXXFLAGS) -I$(DESTDIR)$(includedir) -L$(DESTDIR)$(libdir) testinstall.cc -lre2 -o testinstall)
 	LD_LIBRARY_PATH=$(DESTDIR)$(libdir) obj/testinstall
 
 benchlog: obj/test/regexp_benchmark
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 0cfa480..661a8ca 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -141,6 +141,7 @@ void BitState::Push(int id, const char* p, int arg) {
 // Return whether it succeeded.
 bool BitState::TrySearch(int id0, const char* p0) {
   bool matched = false;
+  bool inaltmatch = false;
   const char* end = text_.end();
   njob_ = 0;
   Push(id0, p0, 0);
@@ -159,6 +160,14 @@ bool BitState::TrySearch(int id0, const char* p0) {
     // would have, but we avoid the stack
     // manipulation.
     if (0) {
+    Next:
+      // If the Match of a non-greedy AltMatch failed,
+      // we stop ourselves from trying the ByteRange,
+      // which would steer us off the short circuit.
+      if (prog_->inst(id)->last() || inaltmatch)
+        continue;
+      id++;
+
     CheckAndLoop:
       if (!ShouldVisit(id, p))
         continue;
@@ -176,66 +185,63 @@ bool BitState::TrySearch(int id0, const char* p0) {
       case kInstFail:
         continue;
 
-      case kInstAlt:
-        // Cannot just
-        //   Push(ip->out1(), p, 0);
-        //   Push(ip->out(), p, 0);
-        // If, during the processing of ip->out(), we encounter
-        // ip->out1() via another path, we want to process it then.
-        // Pushing it here will inhibit that.  Instead, re-push
-        // ip with arg==1 as a reminder to push ip->out1() later.
+      case kInstAltMatch:
         switch (arg) {
           case 0:
+            inaltmatch = true;
             Push(id, p, 1);  // come back when we're done
+
+            // One opcode is ByteRange; the other leads to Match
+            // (possibly via Nop or Capture).
+            if (ip->greedy(prog_)) {
+              // out1 is the match
+              Push(ip->out1(), p, 0);
+              id = ip->out1();
+              p = end;
+              goto CheckAndLoop;
+            }
+            // out is the match - non-greedy
+            Push(ip->out(), end, 0);
             id = ip->out();
             goto CheckAndLoop;
 
           case 1:
-            // Finished ip->out(); try ip->out1().
-            arg = 0;
-            id = ip->out1();
-            goto CheckAndLoop;
+            inaltmatch = false;
+            continue;
         }
-        LOG(DFATAL) << "Bad arg in kInstAlt: " << arg;
+        LOG(DFATAL) << "Bad arg in kInstAltMatch: " << arg;
         continue;
 
-      case kInstAltMatch:
-        // One opcode is byte range; the other leads to match.
-        if (ip->greedy(prog_)) {
-          // out1 is the match
-          Push(ip->out1(), p, 0);
-          id = ip->out1();
-          p = end;
-          goto CheckAndLoop;
-        }
-        // out is the match - non-greedy
-        Push(ip->out(), end, 0);
-        id = ip->out();
-        goto CheckAndLoop;
-
       case kInstByteRange: {
         int c = -1;
         if (p < end)
           c = *p & 0xFF;
-        if (ip->Matches(c)) {
-          id = ip->out();
-          p++;
-          goto CheckAndLoop;
-        }
-        continue;
+        if (!ip->Matches(c))
+          goto Next;
+
+        if (!ip->last())
+          Push(id+1, p, 0);  // try the next when we're done
+        id = ip->out();
+        p++;
+        goto CheckAndLoop;
       }
 
       case kInstCapture:
         switch (arg) {
           case 0:
+            if (!ip->last())
+              Push(id+1, p, 0);  // try the next when we're done
+
             if (0 <= ip->cap() && ip->cap() < ncap_) {
               // Capture p to register, but save old value.
               Push(id, cap_[ip->cap()], 1);  // come back when we're done
               cap_[ip->cap()] = p;
             }
+
             // Continue on.
             id = ip->out();
             goto CheckAndLoop;
+
           case 1:
             // Finished ip->out(); restore the old value.
             cap_[ip->cap()] = p;
@@ -246,17 +252,22 @@ bool BitState::TrySearch(int id0, const char* p0) {
 
       case kInstEmptyWidth:
         if (ip->empty() & ~Prog::EmptyFlags(context_, p))
-          continue;
+          goto Next;
+
+        if (!ip->last())
+          Push(id+1, p, 0);  // try the next when we're done
         id = ip->out();
         goto CheckAndLoop;
 
       case kInstNop:
+        if (!ip->last())
+          Push(id+1, p, 0);  // try the next when we're done
         id = ip->out();
         goto CheckAndLoop;
 
       case kInstMatch: {
         if (endmatch_ && p != text_.end())
-          continue;
+          goto Next;
 
         // VLOG(0) << "Found match.";
         // We found a match.  If the caller doesn't care
@@ -285,7 +296,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
           return true;
 
         // Otherwise, continue on in hope of a longer match.
-        continue;
+        goto Next;
       }
     }
   }
diff --git a/re2/compile.cc b/re2/compile.cc
index 9882fef..df859ab 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc
@@ -541,6 +541,9 @@ bool Compiler::IsCachedRuneByteSuffix(int id) {
 }
 
 void Compiler::AddSuffix(int id) {
+  if (failed_)
+    return;
+
   if (rune_range_.begin == 0) {
     rune_range_.begin = id;
     return;
@@ -1234,6 +1237,7 @@ Prog* Compiler::Finish() {
   prog_->ComputeByteMap();
 
   prog_->Optimize();
+  prog_->Flatten();
 
   // Record remaining memory for DFA.
   if (max_mem_ <= 0) {
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 7c526c8..ccb6065 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -216,7 +216,7 @@ class DFA {
 
   // Converts a State into a Workq: the opposite of WorkqToCachedState.
   // L >= mutex_
-  static void StateToWorkq(State* s, Workq* q);
+  void StateToWorkq(State* s, Workq* q);
 
   // Runs a State on a given byte, returning the next state.
   State* RunStateOnByteUnlocked(State*, int);  // cache_mutex_.r <= L < mutex_
@@ -640,31 +640,17 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) {
           return FullMatchState;
         }
         // Fall through.
-      case kInstByteRange:    // These are useful.
-      case kInstEmptyWidth:
-      case kInstMatch:
-      case kInstAlt:          // Not useful, but necessary [*]
-        inst[n++] = *it;
+      default:
+        // Record iff id is the head of its list, which must
+        // be the case if id-1 is the last of *its* list. :)
+        if (prog_->inst(id-1)->last())
+          inst[n++] = *it;
         if (ip->opcode() == kInstEmptyWidth)
           needflags |= ip->empty();
         if (ip->opcode() == kInstMatch && !prog_->anchor_end())
           sawmatch = true;
         break;
-
-      default:                // The rest are not.
-        break;
     }
-
-    // [*] kInstAlt would seem useless to record in a state, since
-    // we've already followed both its arrows and saved all the
-    // interesting states we can reach from there.  The problem
-    // is that one of the empty-width instructions might lead
-    // back to the same kInstAlt (if an empty-width operator is starred),
-    // producing a different evaluation order depending on whether
-    // we keep the kInstAlt to begin with.  Sigh.
-    // A specific case that this affects is /(^|a)+/ matching "a".
-    // If we don't save the kInstAlt, we will match the whole "a" (0,1)
-    // but in fact the correct leftmost-first match is the leading "" (0,0).
   }
   DCHECK_LE(n, q->size());
   if (n > 0 && inst[n-1] == Mark)
@@ -797,12 +783,12 @@ void DFA::StateToWorkq(State* s, Workq* q) {
     if (s->inst_[i] == Mark)
       q->mark();
     else
-      q->insert_new(s->inst_[i]);
+      // Explore from the head of the list.
+      AddToQueue(q, s->inst_[i], s->flag_ & kFlagEmptyMask);
   }
 }
 
-// Adds ip to the work queue, following empty arrows according to flag
-// and expanding kInstAlt instructions (two-target gotos).
+// Adds ip to the work queue, following empty arrows according to flag.
 void DFA::AddToQueue(Workq* q, int id, uint flag) {
 
   // Use astack_ to hold our stack of states yet to process.
@@ -828,9 +814,8 @@ void DFA::AddToQueue(Workq* q, int id, uint flag) {
       continue;
 
     // If ip is already on the queue, nothing to do.
-    // Otherwise add it.  We don't actually keep all the ones
-    // that get added -- for example, kInstAlt is ignored
-    // when on a work queue -- but adding all ip's here
+    // Otherwise add it.  We don't actually keep all the
+    // ones that get added, but adding all of them here
     // increases the likelihood of q->contains(id),
     // reducing the amount of duplicated work.
     if (q->contains(id))
@@ -840,34 +825,40 @@ void DFA::AddToQueue(Workq* q, int id, uint flag) {
     // Process instruction.
     Prog::Inst* ip = prog_->inst(id);
     switch (ip->opcode()) {
-      case kInstFail:       // can't happen: discarded above
+      default:
+        LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
         break;
 
       case kInstByteRange:  // just save these on the queue
       case kInstMatch:
+        if (!ip->last())
+          stk[nstk++] = id+1;
         break;
 
       case kInstCapture:    // DFA treats captures as no-ops.
       case kInstNop:
+        if (!ip->last())
+          stk[nstk++] = id+1;
+
+        // If this instruction is the [00-FF]* loop at the beginning of
+        // a leftmost-longest unanchored search, separate with a Mark so
+        // that future threads (which will start farther to the right in
+        // the input string) are lower priority than current threads.
+        if (ip->opcode() == kInstNop && q->maxmark() > 0 &&
+            id == prog_->start_unanchored() && id != prog_->start())
+          stk[nstk++] = Mark;
         stk[nstk++] = ip->out();
         break;
 
-      case kInstAlt:        // two choices: expand both, in order
       case kInstAltMatch:
-        // Want to visit out then out1, so push on stack in reverse order.
-        // This instruction is the [00-FF]* loop at the beginning of
-        // a leftmost-longest unanchored search, separate out from out1
-        // with a Mark, so that out1's threads (which will start farther
-        // to the right in the string being searched) are lower priority
-        // than the current ones.
-        stk[nstk++] = ip->out1();
-        if (q->maxmark() > 0 &&
-            id == prog_->start_unanchored() && id != prog_->start())
-          stk[nstk++] = Mark;
-        stk[nstk++] = ip->out();
+        DCHECK(!ip->last());
+        stk[nstk++] = id+1;
         break;
 
       case kInstEmptyWidth:
+        if (!ip->last())
+          stk[nstk++] = id+1;
+
         // Continue on if we have all the right flag bits.
         if (ip->empty() & ~flag)
           break;
@@ -924,10 +915,13 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
     int id = *i;
     Prog::Inst* ip = prog_->inst(id);
     switch (ip->opcode()) {
+      default:
+        LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+        break;
+
       case kInstFail:        // never succeeds
       case kInstCapture:     // already followed
       case kInstNop:         // already followed
-      case kInstAlt:         // already followed
       case kInstAltMatch:    // already followed
       case kInstEmptyWidth:  // already followed
         break;
@@ -1499,8 +1493,13 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
       v->clear();
       for (int i = 0; i < s->ninst_; i++) {
         Prog::Inst* ip = prog_->inst(s->inst_[i]);
-        if (ip->opcode() == kInstMatch)
-          v->push_back(ip->match_id());
+        for (;;) {
+          if (ip->opcode() == kInstMatch)
+            v->push_back(ip->match_id());
+          if (ip->last())
+            break;
+          ip++;
+        }
       }
     }
     if (DebugDFA)
diff --git a/re2/nfa.cc b/re2/nfa.cc
index bc8996c..5232e5f 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -230,25 +230,28 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag,
       break;
 
     case kInstAltMatch:
+      DCHECK(!ip->last());
+      stk[nstk++] = AddState(id+1);
+
       // Save state; will pick up at next byte.
       t = AllocThread();
       t->id = id;
       CopyCapture(t->capture, capture);
       *tp = t;
-      // fall through
-
-    case kInstAlt:
-      // Explore alternatives.
-      stk[nstk++] = AddState(ip->out1());
-      stk[nstk++] = AddState(ip->out());
       break;
 
     case kInstNop:
+      if (!ip->last())
+        stk[nstk++] = AddState(id+1);
+
       // Continue on.
       stk[nstk++] = AddState(ip->out());
       break;
 
     case kInstCapture:
+      if (!ip->last())
+        stk[nstk++] = AddState(id+1);
+
       if ((j=ip->cap()) < ncapture_) {
         // Push a dummy whose only job is to restore capture[j]
         // once we finish exploring this possibility.
@@ -262,6 +265,9 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag,
 
     case kInstMatch:
     case kInstByteRange:
+      if (!ip->last())
+        stk[nstk++] = AddState(id+1);
+
       // Save state; will pick up at next byte.
       t = AllocThread();
       t->id = id;
@@ -272,6 +278,9 @@ void NFA::AddToThreadq(Threadq* q, int id0, int flag,
       break;
 
     case kInstEmptyWidth:
+      if (!ip->last())
+        stk[nstk++] = AddState(id+1);
+
       // Continue on if we have all the right flag bits.
       if (ip->empty() & ~flag)
         break;
@@ -542,14 +551,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
             match_[1] = p;
             matched_ = true;
             break;
-
-          case kInstEmptyWidth:
-            if (ip->empty() & ~(kEmptyEndLine|kEmptyEndText)) {
-              LOG(DFATAL) << "Unexpected empty-width in short circuit: " << ip->empty();
-              break;
-            }
-            id = ip->out();
-            continue;
         }
         break;
       }
@@ -640,10 +641,16 @@ int NFA::ComputeFirstByte() {
         break;
 
       case kInstMatch:
+        if (!ip->last())
+          q.insert(id+1);
+
         // The empty string matches: no first byte.
         return -1;
 
       case kInstByteRange:
+        if (!ip->last())
+          q.insert(id+1);
+
         // Must match only a single byte
         if (ip->lo() != ip->hi())
           return -1;
@@ -660,6 +667,9 @@ int NFA::ComputeFirstByte() {
       case kInstNop:
       case kInstCapture:
       case kInstEmptyWidth:
+        if (!ip->last())
+          q.insert(id+1);
+
         // Continue on.
         // Ignore ip->empty() flags for kInstEmptyWidth
         // in order to be as conservative as possible
@@ -668,13 +678,9 @@ int NFA::ComputeFirstByte() {
           q.insert(ip->out());
         break;
 
-      case kInstAlt:
       case kInstAltMatch:
-        // Explore alternatives.
-        if (ip->out())
-          q.insert(ip->out());
-        if (ip->out1())
-          q.insert(ip->out1());
+        DCHECK(!ip->last());
+        q.insert(id+1);
         break;
 
       case kInstFail:
@@ -723,31 +729,42 @@ void Prog::Fanout(SparseArray<int>* fanout) {
     reachable.clear();
     reachable.insert(i->index());
     for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
-      Prog::Inst* ip = inst(*j);
+      int id = *j;
+      Prog::Inst* ip = inst(id);
       switch (ip->opcode()) {
         default:
           LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()";
           break;
 
         case kInstByteRange:
+          if (!ip->last())
+            reachable.insert(id+1);
+
           (*count)++;
           if (!fanout->has_index(ip->out())) {
             fanout->set_new(ip->out(), 0);
           }
           break;
 
-        case kInstAlt:
         case kInstAltMatch:
-          reachable.insert(ip->out1());
-          // fall through
+          DCHECK(!ip->last());
+          reachable.insert(id+1);
+          break;
 
         case kInstCapture:
         case kInstEmptyWidth:
         case kInstNop:
+          if (!ip->last())
+            reachable.insert(id+1);
+
           reachable.insert(ip->out());
           break;
 
         case kInstMatch:
+          if (!ip->last())
+            reachable.insert(id+1);
+          break;
+
         case kInstFail:
           break;
       }
diff --git a/re2/onepass.cc b/re2/onepass.cc
index 73acdc8..977ed96 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc
@@ -427,21 +427,30 @@ bool Prog::IsOnePass() {
       Prog::Inst* ip = inst(id);
       uint32 cond = stack[nstack].cond;
       switch (ip->opcode()) {
+        default:
+          LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+          break;
+
         case kInstAltMatch:
           // TODO(rsc): Ignoring kInstAltMatch optimization.
           // Should implement it in this engine, but it's subtle.
-          // Fall through.
-        case kInstAlt:
+          DCHECK(!ip->last());
           // If already on work queue, (1) is violated: bail out.
-          if (!AddQ(&workq, ip->out()) || !AddQ(&workq, ip->out1()))
+          if (!AddQ(&workq, id+1))
             goto fail;
-          stack[nstack].id = ip->out1();
-          stack[nstack++].cond = cond;
-          stack[nstack].id = ip->out();
+          stack[nstack].id = id+1;
           stack[nstack++].cond = cond;
           break;
 
         case kInstByteRange: {
+          if (!ip->last()) {
+            // If already on work queue, (1) is violated: bail out.
+            if (!AddQ(&workq, id+1))
+              goto fail;
+            stack[nstack].id = id+1;
+            stack[nstack++].cond = cond;
+          }
+
           int nextindex = nodebyid[ip->out()];
           if (nextindex == -1) {
             if (nalloc >= maxnodes) {
@@ -501,16 +510,21 @@ bool Prog::IsOnePass() {
         }
 
         case kInstCapture:
-          if (ip->cap() < kMaxCap)
-            cond |= (1 << kCapShift) << ip->cap();
-          goto QueueEmpty;
-
         case kInstEmptyWidth:
-          cond |= ip->empty();
-          goto QueueEmpty;
-
         case kInstNop:
-        QueueEmpty:
+          if (!ip->last()) {
+            // If already on work queue, (1) is violated: bail out.
+            if (!AddQ(&workq, id+1))
+              goto fail;
+            stack[nstack].id = id+1;
+            stack[nstack++].cond = cond;
+          }
+
+          if (ip->opcode() == kInstCapture && ip->cap() < kMaxCap)
+            cond |= (1 << kCapShift) << ip->cap();
+          if (ip->opcode() == kInstEmptyWidth)
+            cond |= ip->empty();
+
           // kInstCapture and kInstNop always proceed to ip->out().
           // kInstEmptyWidth only sometimes proceeds to ip->out(),
           // but as a conservative approximation we assume it always does.
@@ -531,6 +545,14 @@ bool Prog::IsOnePass() {
           break;
 
         case kInstMatch:
+          if (!ip->last()) {
+            // If already on work queue, (1) is violated: bail out.
+            if (!AddQ(&workq, id+1))
+              goto fail;
+            stack[nstack].id = id+1;
+            stack[nstack++].cond = cond;
+          }
+
           if (matched) {
             // (3) is violated
             if (Debug) {
diff --git a/re2/prefilter.cc b/re2/prefilter.cc
index 45e43c9..f171eec 100644
--- a/re2/prefilter.cc
+++ b/re2/prefilter.cc
@@ -15,8 +15,6 @@ static const int Trace = false;
 typedef set<string>::iterator SSIter;
 typedef set<string>::const_iterator ConstSSIter;
 
-GLOBAL_MUTEX(alloc_id_mutex);
-static int alloc_id = 100000;  // Used for debugging.
 // Initializes a Prefilter, allocating subs_ as necessary.
 Prefilter::Prefilter(Op op) {
   op_ = op;
@@ -24,15 +22,12 @@ Prefilter::Prefilter(Op op) {
   if (op_ == AND || op_ == OR)
     subs_ = new vector<Prefilter*>;
 
-  GLOBAL_MUTEX_LOCK(alloc_id_mutex);
-  alloc_id_ = alloc_id++;
-  GLOBAL_MUTEX_UNLOCK(alloc_id_mutex);
-  VLOG(10) << "alloc_id: " << alloc_id_;
+  VLOG(10) << "constructed: " << reinterpret_cast<intptr_t>(this);
 }
 
 // Destroys a Prefilter.
 Prefilter::~Prefilter() {
-  VLOG(10) << "Deleted: " << alloc_id_;
+  VLOG(10) << "destructing: " << reinterpret_cast<intptr_t>(this);
   if (subs_) {
     for (size_t i = 0; i < subs_->size(); i++)
       delete (*subs_)[i];
diff --git a/re2/prefilter.h b/re2/prefilter.h
index 2bc1676..531c189 100644
--- a/re2/prefilter.h
+++ b/re2/prefilter.h
@@ -94,9 +94,6 @@ class Prefilter {
   // and -1 for duplicate nodes.
   int unique_id_;
 
-  // Used for debugging, helps in tracking memory leaks.
-  int alloc_id_;
-
   DISALLOW_COPY_AND_ASSIGN(Prefilter);
 };
 
diff --git a/re2/prog.cc b/re2/prog.cc
index cb7e6e0..b5f48ef 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -6,7 +6,6 @@
 // Tested by compile_test.cc
 
 #include "util/util.h"
-#include "util/sparse_set.h"
 #include "re2/prog.h"
 #include "re2/stringpiece.h"
 
@@ -94,6 +93,7 @@ Prog::Prog()
   : anchor_start_(false),
     anchor_end_(false),
     reversed_(false),
+    did_flatten_(false),
     did_onepass_(false),
     start_(0),
     start_unanchored_(0),
@@ -131,7 +131,6 @@ static inline void AddToQueue(Workq* q, int id) {
 
 static string ProgToString(Prog* prog, Workq* q) {
   string s;
-
   for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
     int id = *i;
     Prog::Inst* ip = prog->inst(id);
@@ -143,6 +142,18 @@ static string ProgToString(Prog* prog, Workq* q) {
   return s;
 }
 
+static string FlattenedProgToString(Prog* prog, int start) {
+  string s;
+  for (int id = start; id < prog->size(); id++) {
+    Prog::Inst* ip = prog->inst(id);
+    if (ip->last())
+      StringAppendF(&s, "%d. %s\n", id, ip->Dump().c_str());
+    else
+      StringAppendF(&s, "%d+ %s\n", id, ip->Dump().c_str());
+  }
+  return s;
+}
+
 string Prog::Dump() {
   string map;
   if (false) {  // Debugging
@@ -155,12 +166,18 @@ string Prog::Dump() {
     StringAppendF(&map, "\n");
   }
 
+  if (did_flatten_)
+    return map + FlattenedProgToString(this, start_);
+
   Workq q(size_);
   AddToQueue(&q, start_);
   return map + ProgToString(this, &q);
 }
 
 string Prog::DumpUnanchored() {
+  if (did_flatten_)
+    return FlattenedProgToString(this, start_unanchored_);
+
   Workq q(size_);
   AddToQueue(&q, start_unanchored_);
   return ProgToString(this, &q);
@@ -337,5 +354,173 @@ void Prog::ComputeByteMap() {
   }
 }
 
-}  // namespace re2
+void Prog::Flatten() {
+  if (did_flatten_)
+    return;
+  did_flatten_ = true;
+
+  // Scratch structures. It's important that these are reused by EmitList()
+  // because we call it in a loop and it would thrash the heap otherwise.
+  SparseSet q(size());
+  vector<int> stk;
+  stk.reserve(size());
+
+  // First pass: Marks "roots".
+  // Builds the mapping from inst-ids to root-ids.
+  SparseArray<int> rootmap(size());
+  MarkRoots(&rootmap, &q, &stk);
+
+  // Second pass: Emits "lists". Remaps outs to root-ids.
+  // Builds the mapping from root-ids to flat-ids.
+  vector<int> flatmap(rootmap.size());
+  vector<Inst> flat;
+  flat.reserve(size());
+  for (SparseArray<int>::const_iterator i = rootmap.begin();
+       i != rootmap.end();
+       ++i) {
+    flatmap[i->value()] = flat.size();
+    EmitList(i->index(), &rootmap, &flat, &q, &stk);
+    flat.back().set_last();
+  }
+
+  // Third pass: Remaps outs to flat-ids.
+  for (int id = 0; id < static_cast<int>(flat.size()); id++) {
+    Inst* ip = &flat[id];
+    if (ip->opcode() != kInstAltMatch)  // handled in EmitList()
+      ip->set_out(flatmap[ip->out()]);
+  }
+
+  // Remap start_unanchored and start.
+  if (start_unanchored() == 0) {
+    DCHECK_EQ(start(), 0);
+  } else if (start_unanchored() == start()) {
+    set_start_unanchored(flatmap[1]);
+    set_start(flatmap[1]);
+  } else {
+    set_start_unanchored(flatmap[1]);
+    set_start(flatmap[2]);
+  }
+
+  // Finally, replace the old instructions with the new instructions.
+  size_ = flat.size();
+  delete[] inst_;
+  inst_ = new Inst[size_];
+  memmove(inst_, flat.data(), size_ * sizeof *inst_);
+}
+
+void Prog::MarkRoots(SparseArray<int>* rootmap,
+                     SparseSet* q, vector<int>* stk) {
+  // Mark the kInstFail instruction.
+  rootmap->set_new(0, rootmap->size());
+
+  // Mark the start_unanchored and start instructions.
+  if (!rootmap->has_index(start_unanchored()))
+    rootmap->set_new(start_unanchored(), rootmap->size());
+  if (!rootmap->has_index(start()))
+    rootmap->set_new(start(), rootmap->size());
+
+  q->clear();
+  stk->clear();
+  stk->push_back(start_unanchored());
+  while (!stk->empty()) {
+    int id = stk->back();
+    stk->pop_back();
+  Loop:
+    if (q->contains(id))
+      continue;
+    q->insert_new(id);
+
+    Inst* ip = inst(id);
+    switch (ip->opcode()) {
+      default:
+        LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+        break;
+
+      case kInstAltMatch:
+      case kInstAlt:
+        stk->push_back(ip->out1());
+        id = ip->out();
+        goto Loop;
+
+      case kInstByteRange:
+      case kInstCapture:
+      case kInstEmptyWidth:
+        // Mark the out of this instruction.
+        if (!rootmap->has_index(ip->out()))
+          rootmap->set_new(ip->out(), rootmap->size());
+        id = ip->out();
+        goto Loop;
+
+      case kInstNop:
+        id = ip->out();
+        goto Loop;
 
+      case kInstMatch:
+      case kInstFail:
+        break;
+    }
+  }
+}
+
+void Prog::EmitList(int root, SparseArray<int>* rootmap, vector<Inst>* flat,
+                    SparseSet* q, vector<int>* stk) {
+  q->clear();
+  stk->clear();
+  stk->push_back(root);
+  while (!stk->empty()) {
+    int id = stk->back();
+    stk->pop_back();
+  Loop:
+    if (q->contains(id))
+      continue;
+    q->insert_new(id);
+
+    if (id != root && rootmap->has_index(id)) {
+      // We reached another "tree" via epsilon transition. Emit a kInstNop
+      // instruction so that the Prog does not become quadratically larger.
+      flat->emplace_back();
+      flat->back().set_opcode(kInstNop);
+      flat->back().set_out(rootmap->get_existing(id));
+      continue;
+    }
+
+    Inst* ip = inst(id);
+    switch (ip->opcode()) {
+      default:
+        LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+        break;
+
+      case kInstAltMatch:
+        flat->emplace_back();
+        flat->back().set_opcode(kInstAltMatch);
+        flat->back().set_out(flat->size());
+        flat->back().out1_ = flat->size()+1;
+        // Fall through.
+
+      case kInstAlt:
+        stk->push_back(ip->out1());
+        id = ip->out();
+        goto Loop;
+
+      case kInstByteRange:
+      case kInstCapture:
+      case kInstEmptyWidth:
+        flat->emplace_back();
+        memmove(&flat->back(), ip, sizeof *ip);
+        flat->back().set_out(rootmap->get_existing(ip->out()));
+        break;
+
+      case kInstNop:
+        id = ip->out();
+        goto Loop;
+
+      case kInstMatch:
+      case kInstFail:
+        flat->emplace_back();
+        memmove(&flat->back(), ip, sizeof *ip);
+        break;
+    }
+  }
+}
+
+}  // namespace re2
diff --git a/re2/prog.h b/re2/prog.h
index 45e9f0b..4a4017c 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -11,6 +11,7 @@
 
 #include "util/util.h"
 #include "util/sparse_array.h"
+#include "util/sparse_set.h"
 #include "re2/re2.h"
 
 namespace re2 {
@@ -84,7 +85,11 @@ class Prog {
   // Single instruction in regexp program.
   class Inst {
    public:
-    Inst() : out_opcode_(0), out1_(0) { }
+    Inst() : out_opcode_(0), out1_(0) {}
+
+    // Copyable.
+    Inst(const Inst&) = default;
+    Inst& operator=(const Inst&) = default;
 
     // Constructors per opcode
     void InitAlt(uint32 out, uint32 out1);
@@ -98,17 +103,21 @@ class Prog {
     // Getters
     int id(Prog* p) { return static_cast<int>(this - p->inst_); }
     InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
-    int out()     { return out_opcode_>>3; }
-    int out1()    { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
+    int last()      { return (out_opcode_>>3)&1; }
+    int out()       { return out_opcode_>>4; }
+    int out1()      { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
     int cap()       { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
     int lo()        { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
     int hi()        { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
     int foldcase()  { DCHECK_EQ(opcode(), kInstByteRange); return foldcase_; }
     int match_id()  { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
     EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
-    bool greedy(Prog *p) {
+
+    bool greedy(Prog* p) {
       DCHECK_EQ(opcode(), kInstAltMatch);
-      return p->inst(out())->opcode() == kInstByteRange;
+      return p->inst(out())->opcode() == kInstByteRange ||
+             (p->inst(out())->opcode() == kInstNop &&
+              p->inst(p->inst(out())->out())->opcode() == kInstByteRange);
     }
 
     // Does this inst (an kInstByteRange) match c?
@@ -123,23 +132,27 @@ class Prog {
     string Dump();
 
     // Maximum instruction id.
-    // (Must fit in out_opcode_, and PatchList steals another bit.)
+    // (Must fit in out_opcode_. PatchList/last steal another bit.)
     static const int kMaxInst = (1<<28) - 1;
 
    private:
     void set_opcode(InstOp opcode) {
-      out_opcode_ = (out()<<3) | opcode;
+      out_opcode_ = (out()<<4) | (last()<<3) | opcode;
+    }
+
+    void set_last() {
+      out_opcode_ = (out()<<4) | (1<<3) | opcode();
     }
 
     void set_out(int out) {
-      out_opcode_ = (out<<3) | opcode();
+      out_opcode_ = (out<<4) | (last()<<3) | opcode();
     }
 
     void set_out_opcode(int out, InstOp opcode) {
-      out_opcode_ = (out<<3) | opcode;
+      out_opcode_ = (out<<4) | (last()<<3) | opcode;
     }
 
-    uint32 out_opcode_;  // 29 bits of out, 3 (low) bits opcode
+    uint32 out_opcode_;  // 28 bits of out, 1 bit for last, 3 (low) bits opcode
     union {              // additional instruction arguments:
       uint32 out1_;      // opcode == kInstAlt
                          //   alternate next instruction
@@ -167,8 +180,6 @@ class Prog {
     friend class Compiler;
     friend struct PatchList;
     friend class Prog;
-
-    DISALLOW_COPY_AND_ASSIGN(Inst);
   };
 
   // Whether to anchor the search.
@@ -339,6 +350,20 @@ class Prog {
   static Prog* CompileSet(const RE2::Options& options, RE2::Anchor anchor,
                           Regexp* re);
 
+  // Flattens the Prog from "tree" form to "list" form. This is an in-place
+  // operation in the sense that the old instructions are lost.
+  void Flatten();
+
+  // Marks the "roots" in the Prog: the outs of kInstByteRange, kInstCapture
+  // and kInstEmptyWidth instructions.
+  void MarkRoots(SparseArray<int>* rootmap,
+                 SparseSet* q, vector<int>* stk);
+
+  // Emits one "list" via "tree" traversal from the given "root" instruction.
+  // The new instructions are appended to the given vector.
+  void EmitList(int root, SparseArray<int>* rootmap, vector<Inst>* flat,
+                SparseSet* q, vector<int>* stk);
+
  private:
   friend class Compiler;
 
@@ -347,6 +372,7 @@ class Prog {
   bool anchor_start_;       // regexp has explicit start anchor
   bool anchor_end_;         // regexp has explicit end anchor
   bool reversed_;           // whether program runs backward over input
+  bool did_flatten_;        // has Flatten been called?
   bool did_onepass_;        // has IsOnePass been called?
 
   int start_;               // entry point for program
diff --git a/re2/re2.cc b/re2/re2.cc
index b3e582f..492ffaf 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -52,22 +52,11 @@ RE2::Options::Options(RE2::CannedOptions opt)
     one_line_(false) {
 }
 
-// static empty things for use as const references.
-// To avoid global constructors, initialized on demand.
-GLOBAL_MUTEX(empty_mutex);
-static const string *empty_string;
-static const map<string, int> *empty_named_groups;
-static const map<int, string> *empty_group_names;
-
-static void InitEmpty() {
-  GLOBAL_MUTEX_LOCK(empty_mutex);
-  if (empty_string == NULL) {
-    empty_string = new string;
-    empty_named_groups = new map<string, int>;
-    empty_group_names = new map<int, string>;
-  }
-  GLOBAL_MUTEX_UNLOCK(empty_mutex);
-}
+// static empty objects for use as const references.
+// To avoid global constructors, allocated in RE2::Init().
+static const string* empty_string;
+static const map<string, int>* empty_named_groups;
+static const map<int, string>* empty_group_names;
 
 // Converts from Regexp error code to RE2 error code.
 // Maybe some day they will diverge.  In any event, this
@@ -174,19 +163,24 @@ int RE2::Options::ParseFlags() const {
 }
 
 void RE2::Init(const StringPiece& pattern, const Options& options) {
-  mutex_ = new Mutex;
+  static std::once_flag empty_once;
+  std::call_once(empty_once, []() {
+    empty_string = new string;
+    empty_named_groups = new map<string, int>;
+    empty_group_names = new map<int, string>;
+  });
+
   pattern_ = pattern.as_string();
   options_.Copy(options);
-  InitEmpty();
-  error_ = empty_string;
-  error_code_ = NoError;
-  suffix_regexp_ = NULL;
   entire_regexp_ = NULL;
+  suffix_regexp_ = NULL;
   prog_ = NULL;
   rprog_ = NULL;
+  error_ = empty_string;
+  error_code_ = NoError;
+  num_captures_ = -1;
   named_groups_ = NULL;
   group_names_ = NULL;
-  num_captures_ = -1;
 
   RegexpStatus status;
   entire_regexp_ = Regexp::Parse(
@@ -194,14 +188,13 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
     static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
     &status);
   if (entire_regexp_ == NULL) {
-    if (error_ == empty_string)
-      error_ = new string(status.Text());
     if (options_.log_errors()) {
       LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
                  << status.Text();
     }
-    error_arg_ = status.error_arg().as_string();
+    error_ = new string(status.Text());
     error_code_ = RegexpErrorToRE2(status.code());
+    error_arg_ = status.error_arg().as_string();
     return;
   }
 
@@ -235,17 +228,15 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
 
 // Returns rprog_, computing it if needed.
 re2::Prog* RE2::ReverseProg() const {
-  MutexLock l(mutex_);
-  if (rprog_ == NULL && error_ == empty_string) {
+  std::call_once(rprog_once_, [this]() {
     rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3);
     if (rprog_ == NULL) {
       if (options_.log_errors())
         LOG(ERROR) << "Error reverse compiling '" << trunc(pattern_) << "'";
       error_ = new string("pattern too large - reverse compile failed");
       error_code_ = RE2::ErrorPatternTooLarge;
-      return NULL;
     }
-  }
+  });
   return rprog_;
 }
 
@@ -254,7 +245,6 @@ RE2::~RE2() {
     suffix_regexp_->Decref();
   if (entire_regexp_)
     entire_regexp_->Decref();
-  delete mutex_;
   delete prog_;
   delete rprog_;
   if (error_ != empty_string)
@@ -291,37 +281,32 @@ int RE2::ProgramFanout(map<int, int>* histogram) const {
 // Returns num_captures_, computing it if needed, or -1 if the
 // regexp wasn't valid on construction.
 int RE2::NumberOfCapturingGroups() const {
-  MutexLock l(mutex_);
-  if (suffix_regexp_ == NULL)
-    return -1;
-  if (num_captures_ == -1)
-    num_captures_ = suffix_regexp_->NumCaptures();
+  std::call_once(num_captures_once_, [this]() {
+    if (suffix_regexp_ != NULL)
+      num_captures_ = suffix_regexp_->NumCaptures();
+  });
   return num_captures_;
 }
 
 // Returns named_groups_, computing it if needed.
 const map<string, int>& RE2::NamedCapturingGroups() const {
-  MutexLock l(mutex_);
-  if (!ok())
-    return *empty_named_groups;
-  if (named_groups_ == NULL) {
-    named_groups_ = suffix_regexp_->NamedCaptures();
+  std::call_once(named_groups_once_, [this]() {
+    if (suffix_regexp_ != NULL)
+      named_groups_ = suffix_regexp_->NamedCaptures();
     if (named_groups_ == NULL)
       named_groups_ = empty_named_groups;
-  }
+  });
   return *named_groups_;
 }
 
 // Returns group_names_, computing it if needed.
 const map<int, string>& RE2::CapturingGroupNames() const {
-  MutexLock l(mutex_);
-  if (!ok())
-    return *empty_group_names;
-  if (group_names_ == NULL) {
-    group_names_ = suffix_regexp_->CaptureNames();
+  std::call_once(group_names_once_, [this]() {
+    if (suffix_regexp_ != NULL)
+      group_names_ = suffix_regexp_->CaptureNames();
     if (group_names_ == NULL)
       group_names_ = empty_group_names;
-  }
+  });
   return *group_names_;
 }
 
diff --git a/re2/re2.h b/re2/re2.h
index 5b955d0..4a8c5c8 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -181,6 +181,7 @@
 
 #include <stdint.h>
 #include <map>
+#include <mutex>
 #include <string>
 #include "re2/stringpiece.h"
 #include "re2/variadic_function.h"
@@ -193,25 +194,9 @@ namespace re2 {
 
 using std::string;
 using std::map;
-class Mutex;
 class Prog;
 class Regexp;
 
-// The following enum should be used only as a constructor argument to indicate
-// that the variable has static storage class, and that the constructor should
-// do nothing to its state.  It indicates to the reader that it is legal to
-// declare a static instance of the class, provided the constructor is given
-// the LINKER_INITIALIZED argument.  Normally, it is unsafe to declare a
-// static variable that has a constructor or a destructor because invocation
-// order is undefined.  However, IF the type can be initialized by filling with
-// zeroes (which the loader does for static variables), AND the type's
-// destructor does nothing to the storage, then a constructor for static
-// initialization can be declared as
-//       explicit MyClass(LinkerInitialized x) {}
-// and invoked as
-//       static MyClass my_variable_name(LINKER_INITIALIZED);
-enum LinkerInitialized { LINKER_INITIALIZED };
-
 // Interface for regular expression matching.  Also corresponds to a
 // pre-compiled regular expression.  An "RE2" object is safe for
 // concurrent use by multiple threads.
@@ -723,21 +708,21 @@ class RE2 {
 
   re2::Prog* ReverseProg() const;
 
-  mutable Mutex*           mutex_;
-  string                   pattern_;       // string regular expression
-  Options                  options_;       // option flags
+  string        pattern_;          // string regular expression
+  Options       options_;          // option flags
   string        prefix_;           // required prefix (before regexp_)
   bool          prefix_foldcase_;  // prefix is ASCII case-insensitive
   re2::Regexp*  entire_regexp_;    // parsed regular expression
   re2::Regexp*  suffix_regexp_;    // parsed regular expression, prefix removed
   re2::Prog*    prog_;             // compiled program for regexp
-  mutable re2::Prog* rprog_;       // reverse program for regexp
-  bool                     is_one_pass_;   // can use prog_->SearchOnePass?
-  mutable const string*    error_;         // Error indicator
-                                           // (or points to empty string)
-  mutable ErrorCode        error_code_;    // Error code
-  mutable string           error_arg_;     // Fragment of regexp showing error
-  mutable int              num_captures_;  // Number of capturing groups
+  bool          is_one_pass_;      // can use prog_->SearchOnePass?
+
+  mutable re2::Prog*     rprog_;         // reverse program for regexp
+  mutable const string*  error_;         // Error indicator
+                                         // (or points to empty string)
+  mutable ErrorCode      error_code_;    // Error code
+  mutable string         error_arg_;     // Fragment of regexp showing error
+  mutable int            num_captures_;  // Number of capturing groups
 
   // Map from capture names to indices
   mutable const map<string, int>* named_groups_;
@@ -745,6 +730,12 @@ class RE2 {
   // Map from capture indices to names
   mutable const map<int, string>* group_names_;
 
+  // Onces for lazy computations.
+  mutable std::once_flag rprog_once_;
+  mutable std::once_flag num_captures_once_;
+  mutable std::once_flag named_groups_once_;
+  mutable std::once_flag group_names_once_;
+
   //DISALLOW_COPY_AND_ASSIGN(RE2);
   RE2(const RE2&);
   void operator=(const RE2&);
diff --git a/re2/regexp.cc b/re2/regexp.cc
index 99e72e5..7b1b174 100644
--- a/re2/regexp.cc
+++ b/re2/regexp.cc
@@ -60,30 +60,29 @@ bool Regexp::QuickDestroy() {
   return false;
 }
 
-static map<Regexp*, int> *ref_map;
-GLOBAL_MUTEX(ref_mutex);
+// Lazily allocated.
+static Mutex* ref_mutex;
+static map<Regexp*, int>* ref_map;
 
 int Regexp::Ref() {
   if (ref_ < kMaxRef)
     return ref_;
 
-  GLOBAL_MUTEX_LOCK(ref_mutex);
-  int r = 0;
-  if (ref_map != NULL) {
-    r = (*ref_map)[this];
-  }
-  GLOBAL_MUTEX_UNLOCK(ref_mutex);
-  return r;
+  MutexLock l(ref_mutex);
+  return (*ref_map)[this];
 }
 
 // Increments reference count, returns object as convenience.
 Regexp* Regexp::Incref() {
   if (ref_ >= kMaxRef-1) {
-    // Store ref count in overflow map.
-    GLOBAL_MUTEX_LOCK(ref_mutex);
-    if (ref_map == NULL) {
+    static std::once_flag ref_once;
+    std::call_once(ref_once, []() {
+      ref_mutex = new Mutex;
       ref_map = new map<Regexp*, int>;
-    }
+    });
+
+    // Store ref count in overflow map.
+    MutexLock l(ref_mutex);
     if (ref_ == kMaxRef) {
       // already overflowed
       (*ref_map)[this]++;
@@ -92,7 +91,6 @@ Regexp* Regexp::Incref() {
       (*ref_map)[this] = kMaxRef;
       ref_ = kMaxRef;
     }
-    GLOBAL_MUTEX_UNLOCK(ref_mutex);
     return this;
   }
 
@@ -104,7 +102,7 @@ Regexp* Regexp::Incref() {
 void Regexp::Decref() {
   if (ref_ == kMaxRef) {
     // Ref count is stored in overflow map.
-    GLOBAL_MUTEX_LOCK(ref_mutex);
+    MutexLock l(ref_mutex);
     int r = (*ref_map)[this] - 1;
     if (r < kMaxRef) {
       ref_ = static_cast<uint16>(r);
@@ -112,7 +110,6 @@ void Regexp::Decref() {
     } else {
       (*ref_map)[this] = r;
     }
-    GLOBAL_MUTEX_UNLOCK(ref_mutex);
     return;
   }
   ref_--;
diff --git a/re2/regexp.h b/re2/regexp.h
index 5f222b7..54720eb 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -185,10 +185,10 @@ class RegexpStatus {
   RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {}
   ~RegexpStatus() { delete tmp_; }
 
-  void set_code(enum RegexpStatusCode code) { code_ = code; }
+  void set_code(RegexpStatusCode code) { code_ = code; }
   void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
   void set_tmp(string* tmp) { delete tmp_; tmp_ = tmp; }
-  enum RegexpStatusCode code() const { return code_; }
+  RegexpStatusCode code() const { return code_; }
   const StringPiece& error_arg() const { return error_arg_; }
   bool ok() const { return code() == kRegexpSuccess; }
 
@@ -197,14 +197,14 @@ class RegexpStatus {
 
   // Returns text equivalent of code, e.g.:
   //   "Bad character class"
-  static string CodeText(enum RegexpStatusCode code);
+  static string CodeText(RegexpStatusCode code);
 
   // Returns text describing error, e.g.:
   //   "Bad character class: [z-a]"
   string Text() const;
 
  private:
-  enum RegexpStatusCode code_;  // Kind of error
+  RegexpStatusCode code_;  // Kind of error
   StringPiece error_arg_;       // Piece of regexp containing syntax error.
   string* tmp_;                 // Temporary storage, possibly where error_arg_ is.
 
@@ -314,7 +314,7 @@ class Regexp {
   RegexpOp op() { return static_cast<RegexpOp>(op_); }
   int nsub() { return nsub_; }
   bool simple() { return simple_ != 0; }
-  enum ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
+  ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
   int Ref();  // For testing.
 
   Regexp** sub() {
@@ -628,8 +628,6 @@ inline Regexp::ParseFlags operator~(Regexp::ParseFlags a)
   return static_cast<Regexp::ParseFlags>(~static_cast<int>(a));
 }
 
-
-
 }  // namespace re2
 
 #endif  // RE2_REGEXP_H__
diff --git a/re2/set.cc b/re2/set.cc
index a1a84ba..0e626a2 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -96,7 +96,8 @@ bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {
     LOG(DFATAL) << "RE2::Set::Match without Compile";
     return false;
   }
-  v->clear();
+  if (v != NULL)
+    v->clear();
   bool failed;
   bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
                               Prog::kManyMatch, NULL, &failed, v);
@@ -105,7 +106,7 @@ bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {
 
   if (ret == false)
     return false;
-  if (v->size() == 0) {
+  if (v != NULL && v->empty()) {
     LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
     return false;
   }
diff --git a/re2/set.h b/re2/set.h
index e48d8cb..5bcdb70 100644
--- a/re2/set.h
+++ b/re2/set.h
@@ -36,7 +36,7 @@ class RE2::Set {
   bool Compile();
 
   // Match returns true if text matches any of the regexps in the set.
-  // If so, it fills v with the indices of the matching regexps.
+  // If so, it fills v (if not NULL) with the indices of the matching regexps.
   bool Match(const StringPiece& text, vector<int>* v) const;
 
  private:
diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc
index 2141c53..95c14c2 100644
--- a/re2/testing/backtrack.cc
+++ b/re2/testing/backtrack.cc
@@ -55,10 +55,14 @@ class Backtracker {
               StringPiece* submatch, int nsubmatch);
 
  private:
-  // Explores from instruction ip at string position p looking for a match.
+  // Explores from instruction id at string position p looking for a match.
   // Returns true if found (so that caller can stop trying other possibilities).
   bool Visit(int id, const char* p);
 
+  // Tries instruction id at string position p.
+  // Returns true if a match is found.
+  bool Try(int id, const char* p);
+
   // Search parameters
   Prog* prog_;              // program being run
   StringPiece text_;        // text being searched
@@ -143,7 +147,7 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
   return false;
 }
 
-// Explores from instruction ip at string position p looking for a match.
+// Explores from instruction id at string position p looking for a match.
 // Return true if found (so that caller can stop trying other possibilities).
 bool Backtracker::Visit(int id, const char* p) {
   // Check bitmap.  If we've already explored from here,
@@ -156,6 +160,20 @@ bool Backtracker::Visit(int id, const char* p) {
     return false;
   visited_[n/32] |= 1 << (n&31);
 
+  Prog::Inst* ip = prog_->inst(id);
+  if (Try(id, p)) {
+    if (longest_ && !ip->last())
+      Visit(id+1, p);
+    return true;
+  }
+  if (!ip->last())
+    return Visit(id+1, p);
+  return false;
+}
+
+// Tries instruction id at string position p.
+// Returns true if a match is found.
+bool Backtracker::Try(int id, const char* p) {
   // Pick out byte at current position.  If at end of string,
   // have to explore in hope of finishing a match.  Use impossible byte -1.
   int c = -1;
@@ -168,15 +186,9 @@ bool Backtracker::Visit(int id, const char* p) {
       LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
       return false;  // not reached
 
-    case kInstAlt:
     case kInstAltMatch:
-      // Try both possible next states: out is preferred to out1.
-      if (Visit(ip->out(), p)) {
-        if (longest_)
-          Visit(ip->out1(), p);
-        return true;
-      }
-      return Visit(ip->out1(), p);
+      // Ignored.
+      return false;
 
     case kInstByteRange:
       if (ip->Matches(c))
diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc
index dee90a3..05e7ea8 100644
--- a/re2/testing/compile_test.cc
+++ b/re2/testing/compile_test.cc
@@ -27,82 +27,89 @@ struct Test {
 
 static Test tests[] = {
   { "a",
-    "1. byte [61-61] -> 2\n"
-    "2. match! 0\n" },
+    "3. byte [61-61] -> 4\n"
+    "4. match! 0\n" },
   { "ab",
-    "1. byte [61-61] -> 2\n"
-    "2. byte [62-62] -> 3\n"
-    "3. match! 0\n" },
+    "3. byte [61-61] -> 4\n"
+    "4. byte [62-62] -> 5\n"
+    "5. match! 0\n" },
   { "a|c",
-    "3. alt -> 1 | 2\n"
-    "1. byte [61-61] -> 4\n"
-    "2. byte [63-63] -> 4\n"
-    "4. match! 0\n" },
+    "3+ byte [61-61] -> 5\n"
+    "4. byte [63-63] -> 5\n"
+    "5. match! 0\n" },
   { "a|b",
-    "1. byte [61-62] -> 2\n"
-    "2. match! 0\n" },
+    "3. byte [61-62] -> 4\n"
+    "4. match! 0\n" },
   { "[ab]",
-    "1. byte [61-62] -> 2\n"
-    "2. match! 0\n" },
+    "3. byte [61-62] -> 4\n"
+    "4. match! 0\n" },
   { "a+",
-    "1. byte [61-61] -> 2\n"
-    "2. alt -> 1 | 3\n"
-    "3. match! 0\n" },
+    "3. byte [61-61] -> 4\n"
+    "4+ nop -> 3\n"
+    "5. match! 0\n" },
   { "a+?",
-    "1. byte [61-61] -> 2\n"
-    "2. alt -> 3 | 1\n"
-    "3. match! 0\n" },
+    "3. byte [61-61] -> 4\n"
+    "4+ match! 0\n"
+    "5. nop -> 3\n" },
   { "a*",
-    "2. alt -> 1 | 3\n"
-    "1. byte [61-61] -> 2\n"
-    "3. match! 0\n" },
+    "3+ byte [61-61] -> 3\n"
+    "4. match! 0\n" },
   { "a*?",
-    "2. alt -> 3 | 1\n"
-    "3. match! 0\n"
-    "1. byte [61-61] -> 2\n" },
+    "3+ match! 0\n"
+    "4. byte [61-61] -> 3\n" },
   { "a?",
-    "2. alt -> 1 | 3\n"
-    "1. byte [61-61] -> 3\n"
-    "3. match! 0\n" },
+    "3+ byte [61-61] -> 5\n"
+    "4. nop -> 5\n"
+    "5. match! 0\n" },
   { "a??",
-    "2. alt -> 3 | 1\n"
-    "3. match! 0\n"
-    "1. byte [61-61] -> 3\n" },
+    "3+ nop -> 5\n"
+    "4. byte [61-61] -> 5\n"
+    "5. match! 0\n" },
   { "a{4}",
-    "1. byte [61-61] -> 2\n"
-    "2. byte [61-61] -> 3\n"
     "3. byte [61-61] -> 4\n"
     "4. byte [61-61] -> 5\n"
-    "5. match! 0\n" },
+    "5. byte [61-61] -> 6\n"
+    "6. byte [61-61] -> 7\n"
+    "7. match! 0\n" },
   { "(a)",
-    "2. capture 2 -> 1\n"
-    "1. byte [61-61] -> 3\n"
-    "3. capture 3 -> 4\n"
-    "4. match! 0\n" },
+    "3. capture 2 -> 4\n"
+    "4. byte [61-61] -> 5\n"
+    "5. capture 3 -> 6\n"
+    "6. match! 0\n" },
   { "(?:a)",
-    "1. byte [61-61] -> 2\n"
-    "2. match! 0\n" },
+    "3. byte [61-61] -> 4\n"
+    "4. match! 0\n" },
   { "",
-    "2. match! 0\n" },
+    "3. match! 0\n" },
   { ".",
-    "3. alt -> 1 | 2\n"
-    "1. byte [00-09] -> 4\n"
-    "2. byte [0b-ff] -> 4\n"
-    "4. match! 0\n" },
+    "3+ byte [00-09] -> 5\n"
+    "4. byte [0b-ff] -> 5\n"
+    "5. match! 0\n" },
   { "[^ab]",
-    "5. alt -> 3 | 4\n"
-    "3. alt -> 1 | 2\n"
-    "4. byte [63-ff] -> 6\n"
-    "1. byte [00-09] -> 6\n"
-    "2. byte [0b-60] -> 6\n"
+    "3+ byte [00-09] -> 6\n"
+    "4+ byte [0b-60] -> 6\n"
+    "5. byte [63-ff] -> 6\n"
     "6. match! 0\n" },
   { "[Aa]",
-    "1. byte/i [61-61] -> 2\n"
-    "2. match! 0\n" },
+    "3. byte/i [61-61] -> 4\n"
+    "4. match! 0\n" },
+  { "\\C+",
+    "3. byte [00-ff] -> 4\n"
+    "4+ altmatch -> 5 | 6\n"
+    "5+ nop -> 3\n"
+    "6. match! 0\n" },
+  { "\\C*",
+    "3+ altmatch -> 4 | 5\n"
+    "4+ byte [00-ff] -> 3\n"
+    "5. match! 0\n" },
+  { "\\C?",
+    "3+ byte [00-ff] -> 5\n"
+    "4. nop -> 5\n"
+    "5. match! 0\n" },
   // Issue 20992936
   { "[[-`]",
-    "1. byte [5b-60] -> 2\n"
-    "2. match! 0\n" },
+    "3. byte [5b-60] -> 4\n"
+    "4. match! 0\n" },
 };
 
 TEST(TestRegexpCompileToProg, Simple) {
@@ -172,6 +179,18 @@ TEST(TestCompile, ByteRanges) {
   re->Decref();
 }
 
+TEST(TestCompile, InsufficientMemory) {
+  Regexp* re = Regexp::Parse(
+      "^(?P<name1>[^\\s]+)\\s+(?P<name2>[^\\s]+)\\s+(?P<name3>.+)$",
+      Regexp::LikePerl, NULL);
+  EXPECT_TRUE(re != NULL);
+  Prog* prog = re->CompileToProg(920);
+  // If the memory budget has been exhausted, compilation should fail
+  // and return NULL instead of trying to do anything with NoMatch().
+  EXPECT_TRUE(prog == NULL);
+  re->Decref();
+}
+
 static void Dump(StringPiece pattern, Regexp::ParseFlags flags,
                  string* forward, string* reverse) {
   Regexp* re = Regexp::Parse(pattern, flags, NULL);
@@ -201,60 +220,51 @@ TEST(TestCompile, Bug26705922) {
   string forward, reverse;
 
   Dump("[\\x{10000}\\x{10010}]", Regexp::LikePerl, &forward, &reverse);
-  EXPECT_EQ("4. byte [f0-f0] -> 3\n"
-            "3. byte [90-90] -> 2\n"
-            "2. byte [80-80] -> 6\n"
-            "6. alt -> 1 | 5\n"
-            "1. byte [80-80] -> 7\n"
-            "5. byte [90-90] -> 7\n"
-            "7. match! 0\n",
+  EXPECT_EQ("3. byte [f0-f0] -> 4\n"
+            "4. byte [90-90] -> 5\n"
+            "5. byte [80-80] -> 6\n"
+            "6+ byte [80-80] -> 8\n"
+            "7. byte [90-90] -> 8\n"
+            "8. match! 0\n",
             forward);
-  EXPECT_EQ("6. alt -> 4 | 5\n"
-            "4. byte [80-80] -> 3\n"
-            "5. byte [90-90] -> 3\n"
-            "3. byte [80-80] -> 2\n"
-            "2. byte [90-90] -> 1\n"
-            "1. byte [f0-f0] -> 7\n"
-            "7. match! 0\n",
+  EXPECT_EQ("3+ byte [80-80] -> 5\n"
+            "4. byte [90-90] -> 5\n"
+            "5. byte [80-80] -> 6\n"
+            "6. byte [90-90] -> 7\n"
+            "7. byte [f0-f0] -> 8\n"
+            "8. match! 0\n",
             reverse);
 
   Dump("[\\x{8000}-\\x{10FFF}]", Regexp::LikePerl, &forward, &reverse);
-  EXPECT_EQ("6. alt -> 3 | 5\n"
-            "3. byte [e8-ef] -> 2\n"
-            "5. byte [f0-f0] -> 4\n"
-            "2. byte [80-bf] -> 1\n"
-            "4. byte [90-90] -> 2\n"
-            "1. byte [80-bf] -> 7\n"
-            "7. match! 0\n",
+  EXPECT_EQ("3+ byte [e8-ef] -> 5\n"
+            "4. byte [f0-f0] -> 8\n"
+            "5. byte [80-bf] -> 6\n"
+            "6. byte [80-bf] -> 7\n"
+            "7. match! 0\n"
+            "8. byte [90-90] -> 5\n",
             forward);
-  EXPECT_EQ("3. byte [80-bf] -> 2\n"
-            "2. byte [80-bf] -> 6\n"
-            "6. alt -> 1 | 5\n"
-            "1. byte [e8-ef] -> 7\n"
-            "5. byte [90-90] -> 4\n"
+  EXPECT_EQ("3. byte [80-bf] -> 4\n"
+            "4. byte [80-bf] -> 5\n"
+            "5+ byte [e8-ef] -> 7\n"
+            "6. byte [90-90] -> 8\n"
             "7. match! 0\n"
-            "4. byte [f0-f0] -> 7\n",
+            "8. byte [f0-f0] -> 7\n",
             reverse);
 
   Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, NULL, &reverse);
-  EXPECT_EQ("2. byte [80-bf] -> 8\n"
-            "8. alt -> 5 | 7\n"
-            "5. alt -> 1 | 4\n"
-            "7. byte [80-bf] -> 17\n"
-            "1. byte [c2-df] -> 18\n"
-            "4. byte [a0-bf] -> 3\n"
-            "17. alt -> 14 | 16\n"
-            "18. match! 0\n"
-            "3. byte [e0-e0] -> 18\n"
-            "14. alt -> 11 | 13\n"
-            "16. byte [80-8f] -> 15\n"
-            "11. alt -> 6 | 10\n"
-            "13. byte [80-bf] -> 12\n"
-            "15. byte [f4-f4] -> 18\n"
-            "6. byte [e1-ef] -> 18\n"
-            "10. byte [90-bf] -> 9\n"
-            "12. byte [f1-f3] -> 18\n"
-            "9. byte [f0-f0] -> 18\n",
+  EXPECT_EQ("3. byte [80-bf] -> 4\n"
+            "4+ byte [c2-df] -> 7\n"
+            "5+ byte [a0-bf] -> 8\n"
+            "6. byte [80-bf] -> 9\n"
+            "7. match! 0\n"
+            "8. byte [e0-e0] -> 7\n"
+            "9+ byte [e1-ef] -> 7\n"
+            "10+ byte [90-bf] -> 13\n"
+            "11+ byte [80-bf] -> 14\n"
+            "12. byte [80-8f] -> 15\n"
+            "13. byte [f0-f0] -> 7\n"
+            "14. byte [f1-f3] -> 7\n"
+            "15. byte [f4-f4] -> 7\n",
             reverse);
 }
 
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index 5a32d92..81d5fef 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -879,11 +879,16 @@ TEST(RE2, FloatingPointFullMatchTypes) {
     // number, since C does not guarantee to get the correctly
     // rounded answer for strtod and strtof unless the input is
     // short.
+    //
+    // This is known to fail on Cygwin and MinGW due to a broken
+    // implementation of strtof(3). Sigh.
+#if !defined(__CYGWIN__) && !defined(__MINGW32__)
     CHECK(RE2::FullMatch("0.1", "(.*)", &v));
     CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
     CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
     CHECK_EQ(v, 6700000000081920.1f)
       << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
+#endif
   }
   {
     double v;
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
index 8d80b4c..8318e28 100644
--- a/re2/testing/regexp_benchmark.cc
+++ b/re2/testing/regexp_benchmark.cc
@@ -349,6 +349,51 @@ BENCHMARK_RANGE(Search_Success1_Cached_PCRE,     8, 16<<20)->ThreadRange(1, NumC
 BENCHMARK_RANGE(Search_Success1_Cached_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
 BENCHMARK_RANGE(Search_Success1_Cached_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
 
+// Benchmark: AltMatch optimisation (just to verify that it works)
+// Note that OnePass doesn't implement it!
+
+void SearchAltMatch(int iters, int nbytes, SearchImpl* search) {
+  StopBenchmarkTiming();
+  string s;
+  MakeText(&s, nbytes);
+  BenchmarkMemoryUsage();
+  StartBenchmarkTiming();
+  search(iters, "\\C*", s, Prog::kAnchored, true);
+  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
+}
+
+void Search_AltMatch_DFA(int i, int n)      { SearchAltMatch(i, n, SearchDFA); }
+void Search_AltMatch_NFA(int i, int n)      { SearchAltMatch(i, n, SearchNFA); }
+void Search_AltMatch_OnePass(int i, int n)  { SearchAltMatch(i, n, SearchOnePass); }
+void Search_AltMatch_BitState(int i, int n) { SearchAltMatch(i, n, SearchBitState); }
+void Search_AltMatch_PCRE(int i, int n)     { SearchAltMatch(i, n, SearchPCRE); }
+void Search_AltMatch_RE2(int i, int n)      { SearchAltMatch(i, n, SearchRE2); }
+
+BENCHMARK_RANGE(Search_AltMatch_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_NFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_OnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_AltMatch_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_AltMatch_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_AltMatch_CachedDFA(int i, int n)      { SearchAltMatch(i, n, SearchCachedDFA); }
+void Search_AltMatch_CachedNFA(int i, int n)      { SearchAltMatch(i, n, SearchCachedNFA); }
+void Search_AltMatch_CachedOnePass(int i, int n)  { SearchAltMatch(i, n, SearchCachedOnePass); }
+void Search_AltMatch_CachedBitState(int i, int n) { SearchAltMatch(i, n, SearchCachedBitState); }
+void Search_AltMatch_CachedPCRE(int i, int n)     { SearchAltMatch(i, n, SearchCachedPCRE); }
+void Search_AltMatch_CachedRE2(int i, int n)      { SearchAltMatch(i, n, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_AltMatch_CachedDFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_CachedNFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_CachedOnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_AltMatch_CachedPCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_AltMatch_CachedRE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
+
 // Benchmark: use regexp to find phone number.
 
 void SearchDigits(int iters, SearchImpl* search) {
diff --git a/re2/testing/search_test.cc b/re2/testing/search_test.cc
index 3ab2ae3..08e5b4c 100644
--- a/re2/testing/search_test.cc
+++ b/re2/testing/search_test.cc
@@ -264,7 +264,7 @@ RegexpTest simple_tests[] = {
   { "[A-Z]+", "aAzZ" },
   { "[^\\\\]+", "Aa\\" },
   { "[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
-  
+
   // Anchoring.  (^abc in aabcdef was a former bug)
   // The tester checks for a match in the text and
   // subpieces of the text with a byte removed on either side.
@@ -297,7 +297,12 @@ RegexpTest simple_tests[] = {
   { "a", "a" },
   { "ab*", "a" },
   { "a\\C*", "a" },
-  
+  { "a\\C+", "a" },
+  { "a\\C?", "a" },
+  { "a\\C*?", "a" },
+  { "a\\C+?", "a" },
+  { "a\\C??", "a" },
+
   // Former bugs.
   { "a\\C*|ba\\C", "baba" },
 };
diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc
index 4e267ae..c613d6a 100644
--- a/re2/testing/set_test.cc
+++ b/re2/testing/set_test.cc
@@ -18,21 +18,22 @@ TEST(Set, Unanchored) {
   CHECK_EQ(s.Add("foo", NULL), 0);
   CHECK_EQ(s.Add("(", NULL), -1);
   CHECK_EQ(s.Add("bar", NULL), 1);
-
   CHECK_EQ(s.Compile(), true);
 
+  CHECK_EQ(s.Match("foobar", NULL), true);
+  CHECK_EQ(s.Match("fooba", NULL), true);
+  CHECK_EQ(s.Match("oobar", NULL), true);
+
   vector<int> v;
   CHECK_EQ(s.Match("foobar", &v), true);
   CHECK_EQ(v.size(), 2);
   CHECK_EQ(v[0], 0);
   CHECK_EQ(v[1], 1);
 
-  v.clear();
   CHECK_EQ(s.Match("fooba", &v), true);
   CHECK_EQ(v.size(), 1);
   CHECK_EQ(v[0], 0);
 
-  v.clear();
   CHECK_EQ(s.Match("oobar", &v), true);
   CHECK_EQ(v.size(), 1);
   CHECK_EQ(v[0], 1);
@@ -44,27 +45,28 @@ TEST(Set, UnanchoredFactored) {
   CHECK_EQ(s.Add("foo", NULL), 0);
   CHECK_EQ(s.Add("(", NULL), -1);
   CHECK_EQ(s.Add("foobar", NULL), 1);
-
   CHECK_EQ(s.Compile(), true);
 
+  CHECK_EQ(s.Match("foobar", NULL), true);
+  CHECK_EQ(s.Match("obarfoobaroo", NULL), true);
+  CHECK_EQ(s.Match("fooba", NULL), true);
+  CHECK_EQ(s.Match("oobar", NULL), false);
+
   vector<int> v;
   CHECK_EQ(s.Match("foobar", &v), true);
   CHECK_EQ(v.size(), 2);
   CHECK_EQ(v[0], 0);
   CHECK_EQ(v[1], 1);
 
-  v.clear();
   CHECK_EQ(s.Match("obarfoobaroo", &v), true);
   CHECK_EQ(v.size(), 2);
   CHECK_EQ(v[0], 0);
   CHECK_EQ(v[1], 1);
 
-  v.clear();
   CHECK_EQ(s.Match("fooba", &v), true);
   CHECK_EQ(v.size(), 1);
   CHECK_EQ(v[0], 0);
 
-  v.clear();
   CHECK_EQ(s.Match("oobar", &v), false);
   CHECK_EQ(v.size(), 0);
 }
@@ -75,6 +77,8 @@ TEST(Set, UnanchoredDollar) {
   CHECK_EQ(s.Add("foo$", NULL), 0);
   CHECK_EQ(s.Compile(), true);
 
+  CHECK_EQ(s.Match("foo", NULL), true);
+
   vector<int> v;
   CHECK_EQ(s.Match("foo", &v), true);
   CHECK_EQ(v.size(), 1);
@@ -87,9 +91,14 @@ TEST(Set, Anchored) {
   CHECK_EQ(s.Add("foo", NULL), 0);
   CHECK_EQ(s.Add("(", NULL), -1);
   CHECK_EQ(s.Add("bar", NULL), 1);
-
   CHECK_EQ(s.Compile(), true);
 
+  CHECK_EQ(s.Match("foobar", NULL), false);
+  CHECK_EQ(s.Match("fooba", NULL), false);
+  CHECK_EQ(s.Match("oobar", NULL), false);
+  CHECK_EQ(s.Match("foo", NULL), true);
+  CHECK_EQ(s.Match("bar", NULL), true);
+
   vector<int> v;
   CHECK_EQ(s.Match("foobar", &v), false);
   CHECK_EQ(v.size(), 0);
@@ -114,11 +123,13 @@ TEST(Set, EmptyUnanchored) {
 
   CHECK_EQ(s.Compile(), true);
 
+  CHECK_EQ(s.Match("", NULL), false);
+  CHECK_EQ(s.Match("foobar", NULL), false);
+
   vector<int> v;
   CHECK_EQ(s.Match("", &v), false);
   CHECK_EQ(v.size(), 0);
 
-  v.clear();
   CHECK_EQ(s.Match("foobar", &v), false);
   CHECK_EQ(v.size(), 0);
 }
@@ -128,13 +139,38 @@ TEST(Set, EmptyAnchored) {
 
   CHECK_EQ(s.Compile(), true);
 
+  CHECK_EQ(s.Match("", NULL), false);
+  CHECK_EQ(s.Match("foobar", NULL), false);
+
   vector<int> v;
   CHECK_EQ(s.Match("", &v), false);
   CHECK_EQ(v.size(), 0);
 
-  v.clear();
   CHECK_EQ(s.Match("foobar", &v), false);
   CHECK_EQ(v.size(), 0);
 }
 
+TEST(Set, Prefix) {
+  RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
+
+  CHECK_EQ(s.Add("/prefix/\\d*", NULL), 0);
+  CHECK_EQ(s.Compile(), true);
+
+  CHECK_EQ(s.Match("/prefix", NULL), false);
+  CHECK_EQ(s.Match("/prefix/", NULL), true);
+  CHECK_EQ(s.Match("/prefix/42", NULL), true);
+
+  vector<int> v;
+  CHECK_EQ(s.Match("/prefix", &v), false);
+  CHECK_EQ(v.size(), 0);
+
+  CHECK_EQ(s.Match("/prefix/", &v), true);
+  CHECK_EQ(v.size(), 1);
+  CHECK_EQ(v[0], 0);
+
+  CHECK_EQ(s.Match("/prefix/42", &v), true);
+  CHECK_EQ(v.size(), 1);
+  CHECK_EQ(v[0], 0);
+}
+
 }  // namespace re2
diff --git a/util/mutex.h b/util/mutex.h
index b479e48..8a13226 100644
--- a/util/mutex.h
+++ b/util/mutex.h
@@ -208,27 +208,6 @@ class WriterMutexLock {
 #define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
 #define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
 
-// Provide safe way to declare and use global, linker-initialized mutex. Sigh.
-#if HAVE_PTHREAD
-
-#define GLOBAL_MUTEX(name) \
-	static pthread_mutex_t (name) = PTHREAD_MUTEX_INITIALIZER
-#define GLOBAL_MUTEX_LOCK(name) \
-	pthread_mutex_lock(&(name))
-#define GLOBAL_MUTEX_UNLOCK(name) \
-	pthread_mutex_unlock(&(name))
-
-#else
-
-#define GLOBAL_MUTEX(name) \
-	static Mutex name
-#define GLOBAL_MUTEX_LOCK(name) \
-	name.Lock()
-#define GLOBAL_MUTEX_UNLOCK(name) \
-	name.Unlock()
-
-#endif
-
 }  // namespace re2
 
 #endif  /* #define RE2_UTIL_MUTEX_H_ */
diff --git a/util/util.h b/util/util.h
index 8c5d922..7dfab93 100644
--- a/util/util.h
+++ b/util/util.h
@@ -31,6 +31,7 @@
 #include <utility>
 #include <set>
 #include <atomic>
+#include <mutex>        // For std::call_once
 
 // Use std names.
 using std::set;
author	DongHun Kwak <dh0128.kwak@samsung.com>	2016-11-21 16:57:37 +0900
committer	DongHun Kwak <dh0128.kwak@samsung.com>	2016-11-21 16:57:37 +0900
commit	6c0bc52e1c8854d1a96810f0fe9849d83a7c409c (patch)
tree	fc11f96d6acda48ef0b297817c5f6110aa81351b
parent	4dfe393027cfcfe844a8c8b97209a714904308c4 (diff)
download	re2-6c0bc52e1c8854d1a96810f0fe9849d83a7c409c.tar.gz re2-6c0bc52e1c8854d1a96810f0fe9849d83a7c409c.tar.bz2 re2-6c0bc52e1c8854d1a96810f0fe9849d83a7c409c.zip